LLVM 19.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
18#include "RISCVRegisterInfo.h"
19#include "RISCVSubtarget.h"
20#include "RISCVTargetMachine.h"
21#include "llvm/ADT/SmallSet.h"
22#include "llvm/ADT/Statistic.h"
35#include "llvm/IR/IRBuilder.h"
37#include "llvm/IR/IntrinsicsRISCV.h"
40#include "llvm/Support/Debug.h"
46#include <optional>
47
48using namespace llvm;
49
50#define DEBUG_TYPE "riscv-lower"
51
52STATISTIC(NumTailCalls, "Number of tail calls");
53
55 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
56 cl::desc("Give the maximum size (in number of nodes) of the web of "
57 "instructions that we will consider for VW expansion"),
58 cl::init(18));
59
60static cl::opt<bool>
61 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
62 cl::desc("Allow the formation of VW_W operations (e.g., "
63 "VWADD_W) with splat constants"),
64 cl::init(false));
65
67 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
68 cl::desc("Set the minimum number of repetitions of a divisor to allow "
69 "transformation to multiplications by the reciprocal"),
70 cl::init(2));
71
72static cl::opt<int>
74 cl::desc("Give the maximum number of instructions that we will "
75 "use for creating a floating-point immediate value"),
76 cl::init(2));
77
78static cl::opt<bool>
79 RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden,
80 cl::desc("Make i32 a legal type for SelectionDAG on RV64."));
81
83 const RISCVSubtarget &STI)
84 : TargetLowering(TM), Subtarget(STI) {
85
86 RISCVABI::ABI ABI = Subtarget.getTargetABI();
87 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
88
89 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
90 !Subtarget.hasStdExtF()) {
91 errs() << "Hard-float 'f' ABI can't be used for a target that "
92 "doesn't support the F instruction set extension (ignoring "
93 "target-abi)\n";
95 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
96 !Subtarget.hasStdExtD()) {
97 errs() << "Hard-float 'd' ABI can't be used for a target that "
98 "doesn't support the D instruction set extension (ignoring "
99 "target-abi)\n";
100 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
101 }
102
103 switch (ABI) {
104 default:
105 report_fatal_error("Don't know how to lower this ABI");
114 break;
115 }
116
117 MVT XLenVT = Subtarget.getXLenVT();
118
119 // Set up the register classes.
120 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
121 if (Subtarget.is64Bit() && RV64LegalI32)
122 addRegisterClass(MVT::i32, &RISCV::GPRRegClass);
123
124 if (Subtarget.hasStdExtZfhmin())
125 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
126 if (Subtarget.hasStdExtZfbfmin())
127 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
128 if (Subtarget.hasStdExtF())
129 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
130 if (Subtarget.hasStdExtD())
131 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
132 if (Subtarget.hasStdExtZhinxmin())
133 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
134 if (Subtarget.hasStdExtZfinx())
135 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
136 if (Subtarget.hasStdExtZdinx()) {
137 if (Subtarget.is64Bit())
138 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
139 else
140 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
141 }
142
143 static const MVT::SimpleValueType BoolVecVTs[] = {
144 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
145 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
146 static const MVT::SimpleValueType IntVecVTs[] = {
147 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
148 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
149 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
150 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
151 MVT::nxv4i64, MVT::nxv8i64};
152 static const MVT::SimpleValueType F16VecVTs[] = {
153 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
154 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
155 static const MVT::SimpleValueType BF16VecVTs[] = {
156 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
157 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
158 static const MVT::SimpleValueType F32VecVTs[] = {
159 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
160 static const MVT::SimpleValueType F64VecVTs[] = {
161 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
162
163 if (Subtarget.hasVInstructions()) {
164 auto addRegClassForRVV = [this](MVT VT) {
165 // Disable the smallest fractional LMUL types if ELEN is less than
166 // RVVBitsPerBlock.
167 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
168 if (VT.getVectorMinNumElements() < MinElts)
169 return;
170
171 unsigned Size = VT.getSizeInBits().getKnownMinValue();
172 const TargetRegisterClass *RC;
174 RC = &RISCV::VRRegClass;
175 else if (Size == 2 * RISCV::RVVBitsPerBlock)
176 RC = &RISCV::VRM2RegClass;
177 else if (Size == 4 * RISCV::RVVBitsPerBlock)
178 RC = &RISCV::VRM4RegClass;
179 else if (Size == 8 * RISCV::RVVBitsPerBlock)
180 RC = &RISCV::VRM8RegClass;
181 else
182 llvm_unreachable("Unexpected size");
183
184 addRegisterClass(VT, RC);
185 };
186
187 for (MVT VT : BoolVecVTs)
188 addRegClassForRVV(VT);
189 for (MVT VT : IntVecVTs) {
190 if (VT.getVectorElementType() == MVT::i64 &&
191 !Subtarget.hasVInstructionsI64())
192 continue;
193 addRegClassForRVV(VT);
194 }
195
196 if (Subtarget.hasVInstructionsF16Minimal())
197 for (MVT VT : F16VecVTs)
198 addRegClassForRVV(VT);
199
200 if (Subtarget.hasVInstructionsBF16())
201 for (MVT VT : BF16VecVTs)
202 addRegClassForRVV(VT);
203
204 if (Subtarget.hasVInstructionsF32())
205 for (MVT VT : F32VecVTs)
206 addRegClassForRVV(VT);
207
208 if (Subtarget.hasVInstructionsF64())
209 for (MVT VT : F64VecVTs)
210 addRegClassForRVV(VT);
211
212 if (Subtarget.useRVVForFixedLengthVectors()) {
213 auto addRegClassForFixedVectors = [this](MVT VT) {
214 MVT ContainerVT = getContainerForFixedLengthVector(VT);
215 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
216 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
217 addRegisterClass(VT, TRI.getRegClass(RCID));
218 };
220 if (useRVVForFixedLengthVectorVT(VT))
221 addRegClassForFixedVectors(VT);
222
224 if (useRVVForFixedLengthVectorVT(VT))
225 addRegClassForFixedVectors(VT);
226 }
227 }
228
229 // Compute derived properties from the register classes.
231
233
235 MVT::i1, Promote);
236 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
238 MVT::i1, Promote);
239
240 // TODO: add all necessary setOperationAction calls.
242
245 if (RV64LegalI32 && Subtarget.is64Bit())
249 if (RV64LegalI32 && Subtarget.is64Bit())
251
258
259 if (RV64LegalI32 && Subtarget.is64Bit())
261
263
266 if (RV64LegalI32 && Subtarget.is64Bit())
268
270
272
273 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb())
274 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
275
276 if (Subtarget.is64Bit()) {
278
279 if (!RV64LegalI32) {
282 MVT::i32, Custom);
284 MVT::i32, Custom);
285 if (!Subtarget.hasStdExtZbb())
287 } else {
289 if (Subtarget.hasStdExtZbb()) {
292 }
293 }
295 } else {
297 {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128},
298 nullptr);
299 setLibcallName(RTLIB::MULO_I64, nullptr);
300 }
301
302 if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul()) {
304 if (RV64LegalI32 && Subtarget.is64Bit())
306 } else if (Subtarget.is64Bit()) {
308 if (!RV64LegalI32)
310 else
312 } else {
314 }
315
316 if (!Subtarget.hasStdExtM()) {
318 XLenVT, Expand);
319 if (RV64LegalI32 && Subtarget.is64Bit())
321 Promote);
322 } else if (Subtarget.is64Bit()) {
323 if (!RV64LegalI32)
325 {MVT::i8, MVT::i16, MVT::i32}, Custom);
326 }
327
328 if (RV64LegalI32 && Subtarget.is64Bit()) {
332 Expand);
333 }
334
337 Expand);
338
340 Custom);
341
342 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
343 if (!RV64LegalI32 && Subtarget.is64Bit())
345 } else if (Subtarget.hasVendorXTHeadBb()) {
346 if (Subtarget.is64Bit())
349 } else if (Subtarget.hasVendorXCVbitmanip()) {
351 } else {
353 if (RV64LegalI32 && Subtarget.is64Bit())
355 }
356
357 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
358 // pattern match it directly in isel.
360 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
361 Subtarget.hasVendorXTHeadBb())
362 ? Legal
363 : Expand);
364 if (RV64LegalI32 && Subtarget.is64Bit())
366 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
367 Subtarget.hasVendorXTHeadBb())
368 ? Promote
369 : Expand);
370
371
372 if (Subtarget.hasVendorXCVbitmanip()) {
374 } else {
375 // Zbkb can use rev8+brev8 to implement bitreverse.
377 Subtarget.hasStdExtZbkb() ? Custom : Expand);
378 }
379
380 if (Subtarget.hasStdExtZbb()) {
382 Legal);
383 if (RV64LegalI32 && Subtarget.is64Bit())
385 Promote);
386
387 if (Subtarget.is64Bit()) {
388 if (RV64LegalI32)
390 else
392 }
393 } else if (!Subtarget.hasVendorXCVbitmanip()) {
395 if (RV64LegalI32 && Subtarget.is64Bit())
397 }
398
399 if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
400 Subtarget.hasVendorXCVbitmanip()) {
401 // We need the custom lowering to make sure that the resulting sequence
402 // for the 32bit case is efficient on 64bit targets.
403 if (Subtarget.is64Bit()) {
404 if (RV64LegalI32) {
406 Subtarget.hasStdExtZbb() ? Legal : Promote);
407 if (!Subtarget.hasStdExtZbb())
409 } else
411 }
412 } else {
414 if (RV64LegalI32 && Subtarget.is64Bit())
416 }
417
418 if (!RV64LegalI32 && Subtarget.is64Bit() &&
419 !Subtarget.hasShortForwardBranchOpt())
421
422 // We can use PseudoCCSUB to implement ABS.
423 if (Subtarget.hasShortForwardBranchOpt())
425
426 if (!Subtarget.hasVendorXTHeadCondMov()) {
428 if (RV64LegalI32 && Subtarget.is64Bit())
430 }
431
432 static const unsigned FPLegalNodeTypes[] = {
439
440 static const ISD::CondCode FPCCToExpand[] = {
444
445 static const unsigned FPOpToExpand[] = {
447 ISD::FREM};
448
449 static const unsigned FPRndMode[] = {
452
453 if (Subtarget.hasStdExtZfhminOrZhinxmin())
455
456 static const unsigned ZfhminZfbfminPromoteOps[] = {
466
467 if (Subtarget.hasStdExtZfbfmin()) {
476 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
478 // FIXME: Need to promote bf16 FCOPYSIGN to f32, but the
479 // DAGCombiner::visitFP_ROUND probably needs improvements first.
481 }
482
483 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
484 if (Subtarget.hasStdExtZfhOrZhinx()) {
485 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
486 setOperationAction(FPRndMode, MVT::f16,
487 Subtarget.hasStdExtZfa() ? Legal : Custom);
490 } else {
491 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
494 MVT::f16, Legal);
495 // FIXME: Need to promote f16 FCOPYSIGN to f32, but the
496 // DAGCombiner::visitFP_ROUND probably needs improvements first.
498 }
499
502 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
505
507 Subtarget.hasStdExtZfa() ? Legal : Promote);
512 MVT::f16, Promote);
513
514 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
515 // complete support for all operations in LegalizeDAG.
520 MVT::f16, Promote);
521
522 // We need to custom promote this.
523 if (Subtarget.is64Bit())
525
526 if (!Subtarget.hasStdExtZfa())
528 }
529
530 if (Subtarget.hasStdExtFOrZfinx()) {
531 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
532 setOperationAction(FPRndMode, MVT::f32,
533 Subtarget.hasStdExtZfa() ? Legal : Custom);
534 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
538 setOperationAction(FPOpToExpand, MVT::f32, Expand);
539 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
540 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
541 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
542 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
546 Subtarget.isSoftFPABI() ? LibCall : Custom);
549
550 if (Subtarget.hasStdExtZfa())
552 else
554 }
555
556 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
558
559 if (Subtarget.hasStdExtDOrZdinx()) {
560 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
561
562 if (!Subtarget.is64Bit())
564
565 if (Subtarget.hasStdExtZfa()) {
566 setOperationAction(FPRndMode, MVT::f64, Legal);
568 } else {
569 if (Subtarget.is64Bit())
570 setOperationAction(FPRndMode, MVT::f64, Custom);
571
573 }
574
577 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
581 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
582 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
583 setOperationAction(FPOpToExpand, MVT::f64, Expand);
584 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
585 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
586 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
587 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
591 Subtarget.isSoftFPABI() ? LibCall : Custom);
594 }
595
596 if (Subtarget.is64Bit()) {
599 MVT::i32, Custom);
601 }
602
603 if (Subtarget.hasStdExtFOrZfinx()) {
605 Custom);
606
609 XLenVT, Legal);
610
611 if (RV64LegalI32 && Subtarget.is64Bit())
614 MVT::i32, Legal);
615
618 }
619
622 XLenVT, Custom);
623
625
626 if (Subtarget.is64Bit())
628
629 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
630 // Unfortunately this can't be determined just from the ISA naming string.
632 Subtarget.is64Bit() ? Legal : Custom);
634 Subtarget.is64Bit() ? Legal : Custom);
635
638 if (Subtarget.is64Bit())
640
641 if (Subtarget.hasStdExtZicbop()) {
643 }
644
645 if (Subtarget.hasStdExtA()) {
647 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
649 else
651 } else if (Subtarget.hasForcedAtomics()) {
653 } else {
655 }
656
658
660
661 if (Subtarget.hasVInstructions()) {
663
665 if (RV64LegalI32 && Subtarget.is64Bit())
667
668 // RVV intrinsics may have illegal operands.
669 // We also need to custom legalize vmv.x.s.
672 {MVT::i8, MVT::i16}, Custom);
673 if (Subtarget.is64Bit())
675 MVT::i32, Custom);
676 else
678 MVT::i64, Custom);
679
681 MVT::Other, Custom);
682
683 static const unsigned IntegerVPOps[] = {
684 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
685 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
686 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
687 ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR,
688 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
689 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
690 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
691 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
692 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
693 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
694 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
695 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
696 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
697 ISD::VP_USUBSAT};
698
699 static const unsigned FloatingPointVPOps[] = {
700 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
701 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
702 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
703 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
704 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
705 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
706 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
707 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
708 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
709 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
710 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
711 ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE,
712 ISD::EXPERIMENTAL_VP_SPLICE};
713
714 static const unsigned IntegerVecReduceOps[] = {
718
719 static const unsigned FloatingPointVecReduceOps[] = {
722
723 if (!Subtarget.is64Bit()) {
724 // We must custom-lower certain vXi64 operations on RV32 due to the vector
725 // element type being illegal.
727 MVT::i64, Custom);
728
729 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
730
731 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
732 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
733 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
734 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
735 MVT::i64, Custom);
736 }
737
738 for (MVT VT : BoolVecVTs) {
739 if (!isTypeLegal(VT))
740 continue;
741
743
744 // Mask VTs are custom-expanded into a series of standard nodes
748 VT, Custom);
749
751 Custom);
752
755 {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT,
756 Expand);
757
758 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
759
762 Custom);
763
765 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
766 Custom);
767
768 // RVV has native int->float & float->int conversions where the
769 // element type sizes are within one power-of-two of each other. Any
770 // wider distances between type sizes have to be lowered as sequences
771 // which progressively narrow the gap in stages.
776 VT, Custom);
778 Custom);
779
780 // Expand all extending loads to types larger than this, and truncating
781 // stores from types larger than this.
783 setTruncStoreAction(VT, OtherVT, Expand);
785 OtherVT, Expand);
786 }
787
788 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
789 ISD::VP_TRUNCATE, ISD::VP_SETCC},
790 VT, Custom);
791
794
796
797 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
798 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
799
802 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
803 }
804
805 for (MVT VT : IntVecVTs) {
806 if (!isTypeLegal(VT))
807 continue;
808
811
812 // Vectors implement MULHS/MULHU.
814
815 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
816 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
818
820 Legal);
821
823
824 // Custom-lower extensions and truncations from/to mask types.
826 VT, Custom);
827
828 // RVV has native int->float & float->int conversions where the
829 // element type sizes are within one power-of-two of each other. Any
830 // wider distances between type sizes have to be lowered as sequences
831 // which progressively narrow the gap in stages.
836 VT, Custom);
838 Custom);
841 VT, Legal);
842
843 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
844 // nodes which truncate by one power of two at a time.
846
847 // Custom-lower insert/extract operations to simplify patterns.
849 Custom);
850
851 // Custom-lower reduction operations to set up the corresponding custom
852 // nodes' operands.
853 setOperationAction(IntegerVecReduceOps, VT, Custom);
854
855 setOperationAction(IntegerVPOps, VT, Custom);
856
858
860 VT, Custom);
861
863 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
864 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
865 VT, Custom);
866
869 VT, Custom);
870
873
875
877 setTruncStoreAction(VT, OtherVT, Expand);
879 OtherVT, Expand);
880 }
881
884
885 // Splice
887
888 if (Subtarget.hasStdExtZvkb()) {
890 setOperationAction(ISD::VP_BSWAP, VT, Custom);
891 } else {
892 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
894 }
895
896 if (Subtarget.hasStdExtZvbb()) {
898 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
899 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
900 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
901 VT, Custom);
902 } else {
903 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
905 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
906 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
907 VT, Expand);
908
909 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
910 // range of f32.
911 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
912 if (isTypeLegal(FloatVT)) {
914 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
915 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
916 VT, Custom);
917 }
918 }
919 }
920
921 // Expand various CCs to best match the RVV ISA, which natively supports UNE
922 // but no other unordered comparisons, and supports all ordered comparisons
923 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
924 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
925 // and we pattern-match those back to the "original", swapping operands once
926 // more. This way we catch both operations and both "vf" and "fv" forms with
927 // fewer patterns.
928 static const ISD::CondCode VFPCCToExpand[] = {
932 };
933
934 // TODO: support more ops.
935 static const unsigned ZvfhminPromoteOps[] = {
943
944 // TODO: support more vp ops.
945 static const unsigned ZvfhminPromoteVPOps[] = {
946 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
947 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
948 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
949 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SQRT,
950 ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL,
951 ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN,
952 ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT,
953 ISD::VP_FNEARBYINT, ISD::VP_SETCC, ISD::VP_FMINIMUM,
954 ISD::VP_FMAXIMUM};
955
956 // Sets common operation actions on RVV floating-point vector types.
957 const auto SetCommonVFPActions = [&](MVT VT) {
959 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
960 // sizes are within one power-of-two of each other. Therefore conversions
961 // between vXf16 and vXf64 must be lowered as sequences which convert via
962 // vXf32.
965 // Custom-lower insert/extract operations to simplify patterns.
967 Custom);
968 // Expand various condition codes (explained above).
969 setCondCodeAction(VFPCCToExpand, VT, Expand);
970
973
977 VT, Custom);
978
979 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
980
981 // Expand FP operations that need libcalls.
993
995
997
999 VT, Custom);
1000
1002 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1003 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1004 VT, Custom);
1005
1008
1011 VT, Custom);
1012
1015
1017
1018 setOperationAction(FloatingPointVPOps, VT, Custom);
1019
1021 Custom);
1024 VT, Legal);
1029 VT, Custom);
1030 };
1031
1032 // Sets common extload/truncstore actions on RVV floating-point vector
1033 // types.
1034 const auto SetCommonVFPExtLoadTruncStoreActions =
1035 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1036 for (auto SmallVT : SmallerVTs) {
1037 setTruncStoreAction(VT, SmallVT, Expand);
1038 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1039 }
1040 };
1041
1042 if (Subtarget.hasVInstructionsF16()) {
1043 for (MVT VT : F16VecVTs) {
1044 if (!isTypeLegal(VT))
1045 continue;
1046 SetCommonVFPActions(VT);
1047 }
1048 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1049 for (MVT VT : F16VecVTs) {
1050 if (!isTypeLegal(VT))
1051 continue;
1054 Custom);
1055 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1056 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1057 Custom);
1060 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1061 VT, Custom);
1064 VT, Custom);
1066 // load/store
1068
1069 // Custom split nxv32f16 since nxv32f32 if not legal.
1070 if (VT == MVT::nxv32f16) {
1071 setOperationAction(ZvfhminPromoteOps, VT, Custom);
1072 setOperationAction(ZvfhminPromoteVPOps, VT, Custom);
1073 continue;
1074 }
1075 // Add more promote ops.
1076 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1077 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1078 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1079 }
1080 }
1081
1082 if (Subtarget.hasVInstructionsF32()) {
1083 for (MVT VT : F32VecVTs) {
1084 if (!isTypeLegal(VT))
1085 continue;
1086 SetCommonVFPActions(VT);
1087 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1088 }
1089 }
1090
1091 if (Subtarget.hasVInstructionsF64()) {
1092 for (MVT VT : F64VecVTs) {
1093 if (!isTypeLegal(VT))
1094 continue;
1095 SetCommonVFPActions(VT);
1096 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1097 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1098 }
1099 }
1100
1101 if (Subtarget.useRVVForFixedLengthVectors()) {
1103 if (!useRVVForFixedLengthVectorVT(VT))
1104 continue;
1105
1106 // By default everything must be expanded.
1107 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1110 setTruncStoreAction(VT, OtherVT, Expand);
1112 OtherVT, Expand);
1113 }
1114
1115 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1116 // expansion to a build_vector of 0s.
1118
1119 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1121 Custom);
1122
1124 Custom);
1125
1127 VT, Custom);
1128
1130
1132
1134
1136
1138
1140
1143 Custom);
1144
1146 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1147 Custom);
1148
1150 {
1159 },
1160 VT, Custom);
1162 Custom);
1163
1165
1166 // Operations below are different for between masks and other vectors.
1167 if (VT.getVectorElementType() == MVT::i1) {
1168 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1169 ISD::OR, ISD::XOR},
1170 VT, Custom);
1171
1172 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1173 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1174 VT, Custom);
1175
1176 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1177 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1178 continue;
1179 }
1180
1181 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1182 // it before type legalization for i64 vectors on RV32. It will then be
1183 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1184 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1185 // improvements first.
1186 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1189 }
1190
1193
1194 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1195 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1196 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1197 ISD::VP_SCATTER},
1198 VT, Custom);
1199
1203 VT, Custom);
1204
1207
1209
1210 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1211 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1213
1216 VT, Custom);
1217
1220
1223
1224 // Custom-lower reduction operations to set up the corresponding custom
1225 // nodes' operands.
1229 VT, Custom);
1230
1231 setOperationAction(IntegerVPOps, VT, Custom);
1232
1233 if (Subtarget.hasStdExtZvkb())
1235
1236 if (Subtarget.hasStdExtZvbb()) {
1239 VT, Custom);
1240 } else {
1241 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1242 // range of f32.
1243 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1244 if (isTypeLegal(FloatVT))
1247 Custom);
1248 }
1249 }
1250
1252 // There are no extending loads or truncating stores.
1253 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1254 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1255 setTruncStoreAction(VT, InnerVT, Expand);
1256 }
1257
1258 if (!useRVVForFixedLengthVectorVT(VT))
1259 continue;
1260
1261 // By default everything must be expanded.
1262 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1264
1265 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1266 // expansion to a build_vector of 0s.
1268
1269 if (VT.getVectorElementType() == MVT::f16 &&
1270 !Subtarget.hasVInstructionsF16()) {
1273 Custom);
1274 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1276 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1277 Custom);
1279 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1280 VT, Custom);
1283 VT, Custom);
1286 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1287 // Don't promote f16 vector operations to f32 if f32 vector type is
1288 // not legal.
1289 // TODO: could split the f16 vector into two vectors and do promotion.
1290 if (!isTypeLegal(F32VecVT))
1291 continue;
1292 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1293 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1294 continue;
1295 }
1296
1297 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1299 Custom);
1300
1304 VT, Custom);
1305
1308 VT, Custom);
1309
1310 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1311 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1312 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1313 ISD::VP_SCATTER},
1314 VT, Custom);
1315
1320 VT, Custom);
1321
1323
1326 VT, Custom);
1327
1328 setCondCodeAction(VFPCCToExpand, VT, Expand);
1329
1333
1335
1336 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1337
1338 setOperationAction(FloatingPointVPOps, VT, Custom);
1339
1341 Custom);
1348 VT, Custom);
1349 }
1350
1351 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1352 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64},
1353 Custom);
1354 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1356 if (Subtarget.hasStdExtFOrZfinx())
1358 if (Subtarget.hasStdExtDOrZdinx())
1360 }
1361 }
1362
1363 if (Subtarget.hasStdExtA()) {
1365 if (RV64LegalI32 && Subtarget.is64Bit())
1367 }
1368
1369 if (Subtarget.hasForcedAtomics()) {
1370 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1376 XLenVT, LibCall);
1377 }
1378
1379 if (Subtarget.hasVendorXTHeadMemIdx()) {
1380 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1381 setIndexedLoadAction(im, MVT::i8, Legal);
1382 setIndexedStoreAction(im, MVT::i8, Legal);
1383 setIndexedLoadAction(im, MVT::i16, Legal);
1384 setIndexedStoreAction(im, MVT::i16, Legal);
1385 setIndexedLoadAction(im, MVT::i32, Legal);
1386 setIndexedStoreAction(im, MVT::i32, Legal);
1387
1388 if (Subtarget.is64Bit()) {
1389 setIndexedLoadAction(im, MVT::i64, Legal);
1390 setIndexedStoreAction(im, MVT::i64, Legal);
1391 }
1392 }
1393 }
1394
1395 // Function alignments.
1396 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
1397 setMinFunctionAlignment(FunctionAlignment);
1398 // Set preferred alignments.
1401
1405 if (Subtarget.is64Bit())
1407
1408 if (Subtarget.hasStdExtFOrZfinx())
1410
1411 if (Subtarget.hasStdExtZbb())
1413
1414 if (Subtarget.hasStdExtZbs() && Subtarget.is64Bit())
1416
1417 if (Subtarget.hasStdExtZbkb())
1419 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1421 if (Subtarget.hasStdExtFOrZfinx())
1424 if (Subtarget.hasVInstructions())
1426 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
1429 ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL,
1431 if (Subtarget.hasVendorXTHeadMemPair())
1433 if (Subtarget.useRVVForFixedLengthVectors())
1435
1436 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1437 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1438
1439 // Disable strict node mutation.
1440 IsStrictFPEnabled = true;
1441}
1442
1444 LLVMContext &Context,
1445 EVT VT) const {
1446 if (!VT.isVector())
1447 return getPointerTy(DL);
1448 if (Subtarget.hasVInstructions() &&
1449 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1450 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1452}
1453
1454MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1455 return Subtarget.getXLenVT();
1456}
1457
1458// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1459bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1460 unsigned VF,
1461 bool IsScalable) const {
1462 if (!Subtarget.hasVInstructions())
1463 return true;
1464
1465 if (!IsScalable)
1466 return true;
1467
1468 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1469 return true;
1470
1471 // Don't allow VF=1 if those types are't legal.
1472 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1473 return true;
1474
1475 // VLEN=32 support is incomplete.
1476 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1477 return true;
1478
1479 // The maximum VF is for the smallest element width with LMUL=8.
1480 // VF must be a power of 2.
1481 unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
1482 return VF > MaxVF || !isPowerOf2_32(VF);
1483}
1484
1486 const CallInst &I,
1487 MachineFunction &MF,
1488 unsigned Intrinsic) const {
1489 auto &DL = I.getModule()->getDataLayout();
1490
1491 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1492 bool IsUnitStrided, bool UsePtrVal = false) {
1494 // We can't use ptrVal if the intrinsic can access memory before the
1495 // pointer. This means we can't use it for strided or indexed intrinsics.
1496 if (UsePtrVal)
1497 Info.ptrVal = I.getArgOperand(PtrOp);
1498 else
1499 Info.fallbackAddressSpace =
1500 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1501 Type *MemTy;
1502 if (IsStore) {
1503 // Store value is the first operand.
1504 MemTy = I.getArgOperand(0)->getType();
1505 } else {
1506 // Use return type. If it's segment load, return type is a struct.
1507 MemTy = I.getType();
1508 if (MemTy->isStructTy())
1509 MemTy = MemTy->getStructElementType(0);
1510 }
1511 if (!IsUnitStrided)
1512 MemTy = MemTy->getScalarType();
1513
1514 Info.memVT = getValueType(DL, MemTy);
1515 Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);
1517 Info.flags |=
1519 return true;
1520 };
1521
1522 if (I.hasMetadata(LLVMContext::MD_nontemporal))
1524
1526 switch (Intrinsic) {
1527 default:
1528 return false;
1529 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1530 case Intrinsic::riscv_masked_atomicrmw_add_i32:
1531 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1532 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1533 case Intrinsic::riscv_masked_atomicrmw_max_i32:
1534 case Intrinsic::riscv_masked_atomicrmw_min_i32:
1535 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1536 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1537 case Intrinsic::riscv_masked_cmpxchg_i32:
1539 Info.memVT = MVT::i32;
1540 Info.ptrVal = I.getArgOperand(0);
1541 Info.offset = 0;
1542 Info.align = Align(4);
1545 return true;
1546 case Intrinsic::riscv_masked_strided_load:
1547 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ false,
1548 /*IsUnitStrided*/ false);
1549 case Intrinsic::riscv_masked_strided_store:
1550 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ true,
1551 /*IsUnitStrided*/ false);
1552 case Intrinsic::riscv_seg2_load:
1553 case Intrinsic::riscv_seg3_load:
1554 case Intrinsic::riscv_seg4_load:
1555 case Intrinsic::riscv_seg5_load:
1556 case Intrinsic::riscv_seg6_load:
1557 case Intrinsic::riscv_seg7_load:
1558 case Intrinsic::riscv_seg8_load:
1559 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1560 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1561 case Intrinsic::riscv_seg2_store:
1562 case Intrinsic::riscv_seg3_store:
1563 case Intrinsic::riscv_seg4_store:
1564 case Intrinsic::riscv_seg5_store:
1565 case Intrinsic::riscv_seg6_store:
1566 case Intrinsic::riscv_seg7_store:
1567 case Intrinsic::riscv_seg8_store:
1568 // Operands are (vec, ..., vec, ptr, vl)
1569 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1570 /*IsStore*/ true,
1571 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1572 case Intrinsic::riscv_vle:
1573 case Intrinsic::riscv_vle_mask:
1574 case Intrinsic::riscv_vleff:
1575 case Intrinsic::riscv_vleff_mask:
1576 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1577 /*IsStore*/ false,
1578 /*IsUnitStrided*/ true,
1579 /*UsePtrVal*/ true);
1580 case Intrinsic::riscv_vse:
1581 case Intrinsic::riscv_vse_mask:
1582 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1583 /*IsStore*/ true,
1584 /*IsUnitStrided*/ true,
1585 /*UsePtrVal*/ true);
1586 case Intrinsic::riscv_vlse:
1587 case Intrinsic::riscv_vlse_mask:
1588 case Intrinsic::riscv_vloxei:
1589 case Intrinsic::riscv_vloxei_mask:
1590 case Intrinsic::riscv_vluxei:
1591 case Intrinsic::riscv_vluxei_mask:
1592 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1593 /*IsStore*/ false,
1594 /*IsUnitStrided*/ false);
1595 case Intrinsic::riscv_vsse:
1596 case Intrinsic::riscv_vsse_mask:
1597 case Intrinsic::riscv_vsoxei:
1598 case Intrinsic::riscv_vsoxei_mask:
1599 case Intrinsic::riscv_vsuxei:
1600 case Intrinsic::riscv_vsuxei_mask:
1601 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1602 /*IsStore*/ true,
1603 /*IsUnitStrided*/ false);
1604 case Intrinsic::riscv_vlseg2:
1605 case Intrinsic::riscv_vlseg3:
1606 case Intrinsic::riscv_vlseg4:
1607 case Intrinsic::riscv_vlseg5:
1608 case Intrinsic::riscv_vlseg6:
1609 case Intrinsic::riscv_vlseg7:
1610 case Intrinsic::riscv_vlseg8:
1611 case Intrinsic::riscv_vlseg2ff:
1612 case Intrinsic::riscv_vlseg3ff:
1613 case Intrinsic::riscv_vlseg4ff:
1614 case Intrinsic::riscv_vlseg5ff:
1615 case Intrinsic::riscv_vlseg6ff:
1616 case Intrinsic::riscv_vlseg7ff:
1617 case Intrinsic::riscv_vlseg8ff:
1618 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1619 /*IsStore*/ false,
1620 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1621 case Intrinsic::riscv_vlseg2_mask:
1622 case Intrinsic::riscv_vlseg3_mask:
1623 case Intrinsic::riscv_vlseg4_mask:
1624 case Intrinsic::riscv_vlseg5_mask:
1625 case Intrinsic::riscv_vlseg6_mask:
1626 case Intrinsic::riscv_vlseg7_mask:
1627 case Intrinsic::riscv_vlseg8_mask:
1628 case Intrinsic::riscv_vlseg2ff_mask:
1629 case Intrinsic::riscv_vlseg3ff_mask:
1630 case Intrinsic::riscv_vlseg4ff_mask:
1631 case Intrinsic::riscv_vlseg5ff_mask:
1632 case Intrinsic::riscv_vlseg6ff_mask:
1633 case Intrinsic::riscv_vlseg7ff_mask:
1634 case Intrinsic::riscv_vlseg8ff_mask:
1635 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1636 /*IsStore*/ false,
1637 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1638 case Intrinsic::riscv_vlsseg2:
1639 case Intrinsic::riscv_vlsseg3:
1640 case Intrinsic::riscv_vlsseg4:
1641 case Intrinsic::riscv_vlsseg5:
1642 case Intrinsic::riscv_vlsseg6:
1643 case Intrinsic::riscv_vlsseg7:
1644 case Intrinsic::riscv_vlsseg8:
1645 case Intrinsic::riscv_vloxseg2:
1646 case Intrinsic::riscv_vloxseg3:
1647 case Intrinsic::riscv_vloxseg4:
1648 case Intrinsic::riscv_vloxseg5:
1649 case Intrinsic::riscv_vloxseg6:
1650 case Intrinsic::riscv_vloxseg7:
1651 case Intrinsic::riscv_vloxseg8:
1652 case Intrinsic::riscv_vluxseg2:
1653 case Intrinsic::riscv_vluxseg3:
1654 case Intrinsic::riscv_vluxseg4:
1655 case Intrinsic::riscv_vluxseg5:
1656 case Intrinsic::riscv_vluxseg6:
1657 case Intrinsic::riscv_vluxseg7:
1658 case Intrinsic::riscv_vluxseg8:
1659 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1660 /*IsStore*/ false,
1661 /*IsUnitStrided*/ false);
1662 case Intrinsic::riscv_vlsseg2_mask:
1663 case Intrinsic::riscv_vlsseg3_mask:
1664 case Intrinsic::riscv_vlsseg4_mask:
1665 case Intrinsic::riscv_vlsseg5_mask:
1666 case Intrinsic::riscv_vlsseg6_mask:
1667 case Intrinsic::riscv_vlsseg7_mask:
1668 case Intrinsic::riscv_vlsseg8_mask:
1669 case Intrinsic::riscv_vloxseg2_mask:
1670 case Intrinsic::riscv_vloxseg3_mask:
1671 case Intrinsic::riscv_vloxseg4_mask:
1672 case Intrinsic::riscv_vloxseg5_mask:
1673 case Intrinsic::riscv_vloxseg6_mask:
1674 case Intrinsic::riscv_vloxseg7_mask:
1675 case Intrinsic::riscv_vloxseg8_mask:
1676 case Intrinsic::riscv_vluxseg2_mask:
1677 case Intrinsic::riscv_vluxseg3_mask:
1678 case Intrinsic::riscv_vluxseg4_mask:
1679 case Intrinsic::riscv_vluxseg5_mask:
1680 case Intrinsic::riscv_vluxseg6_mask:
1681 case Intrinsic::riscv_vluxseg7_mask:
1682 case Intrinsic::riscv_vluxseg8_mask:
1683 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1684 /*IsStore*/ false,
1685 /*IsUnitStrided*/ false);
1686 case Intrinsic::riscv_vsseg2:
1687 case Intrinsic::riscv_vsseg3:
1688 case Intrinsic::riscv_vsseg4:
1689 case Intrinsic::riscv_vsseg5:
1690 case Intrinsic::riscv_vsseg6:
1691 case Intrinsic::riscv_vsseg7:
1692 case Intrinsic::riscv_vsseg8:
1693 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1694 /*IsStore*/ true,
1695 /*IsUnitStrided*/ false);
1696 case Intrinsic::riscv_vsseg2_mask:
1697 case Intrinsic::riscv_vsseg3_mask:
1698 case Intrinsic::riscv_vsseg4_mask:
1699 case Intrinsic::riscv_vsseg5_mask:
1700 case Intrinsic::riscv_vsseg6_mask:
1701 case Intrinsic::riscv_vsseg7_mask:
1702 case Intrinsic::riscv_vsseg8_mask:
1703 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1704 /*IsStore*/ true,
1705 /*IsUnitStrided*/ false);
1706 case Intrinsic::riscv_vssseg2:
1707 case Intrinsic::riscv_vssseg3:
1708 case Intrinsic::riscv_vssseg4:
1709 case Intrinsic::riscv_vssseg5:
1710 case Intrinsic::riscv_vssseg6:
1711 case Intrinsic::riscv_vssseg7:
1712 case Intrinsic::riscv_vssseg8:
1713 case Intrinsic::riscv_vsoxseg2:
1714 case Intrinsic::riscv_vsoxseg3:
1715 case Intrinsic::riscv_vsoxseg4:
1716 case Intrinsic::riscv_vsoxseg5:
1717 case Intrinsic::riscv_vsoxseg6:
1718 case Intrinsic::riscv_vsoxseg7:
1719 case Intrinsic::riscv_vsoxseg8:
1720 case Intrinsic::riscv_vsuxseg2:
1721 case Intrinsic::riscv_vsuxseg3:
1722 case Intrinsic::riscv_vsuxseg4:
1723 case Intrinsic::riscv_vsuxseg5:
1724 case Intrinsic::riscv_vsuxseg6:
1725 case Intrinsic::riscv_vsuxseg7:
1726 case Intrinsic::riscv_vsuxseg8:
1727 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1728 /*IsStore*/ true,
1729 /*IsUnitStrided*/ false);
1730 case Intrinsic::riscv_vssseg2_mask:
1731 case Intrinsic::riscv_vssseg3_mask:
1732 case Intrinsic::riscv_vssseg4_mask:
1733 case Intrinsic::riscv_vssseg5_mask:
1734 case Intrinsic::riscv_vssseg6_mask:
1735 case Intrinsic::riscv_vssseg7_mask:
1736 case Intrinsic::riscv_vssseg8_mask:
1737 case Intrinsic::riscv_vsoxseg2_mask:
1738 case Intrinsic::riscv_vsoxseg3_mask:
1739 case Intrinsic::riscv_vsoxseg4_mask:
1740 case Intrinsic::riscv_vsoxseg5_mask:
1741 case Intrinsic::riscv_vsoxseg6_mask:
1742 case Intrinsic::riscv_vsoxseg7_mask:
1743 case Intrinsic::riscv_vsoxseg8_mask:
1744 case Intrinsic::riscv_vsuxseg2_mask:
1745 case Intrinsic::riscv_vsuxseg3_mask:
1746 case Intrinsic::riscv_vsuxseg4_mask:
1747 case Intrinsic::riscv_vsuxseg5_mask:
1748 case Intrinsic::riscv_vsuxseg6_mask:
1749 case Intrinsic::riscv_vsuxseg7_mask:
1750 case Intrinsic::riscv_vsuxseg8_mask:
1751 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1752 /*IsStore*/ true,
1753 /*IsUnitStrided*/ false);
1754 }
1755}
1756
1758 const AddrMode &AM, Type *Ty,
1759 unsigned AS,
1760 Instruction *I) const {
1761 // No global is ever allowed as a base.
1762 if (AM.BaseGV)
1763 return false;
1764
1765 // RVV instructions only support register addressing.
1766 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1767 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1768
1769 // Require a 12-bit signed offset.
1770 if (!isInt<12>(AM.BaseOffs))
1771 return false;
1772
1773 switch (AM.Scale) {
1774 case 0: // "r+i" or just "i", depending on HasBaseReg.
1775 break;
1776 case 1:
1777 if (!AM.HasBaseReg) // allow "r+i".
1778 break;
1779 return false; // disallow "r+r" or "r+r+i".
1780 default:
1781 return false;
1782 }
1783
1784 return true;
1785}
1786
1788 return isInt<12>(Imm);
1789}
1790
1792 return isInt<12>(Imm);
1793}
1794
1795// On RV32, 64-bit integers are split into their high and low parts and held
1796// in two different registers, so the trunc is free since the low register can
1797// just be used.
1798// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1799// isTruncateFree?
1801 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1802 return false;
1803 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1804 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1805 return (SrcBits == 64 && DestBits == 32);
1806}
1807
1809 // We consider i64->i32 free on RV64 since we have good selection of W
1810 // instructions that make promoting operations back to i64 free in many cases.
1811 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1812 !DstVT.isInteger())
1813 return false;
1814 unsigned SrcBits = SrcVT.getSizeInBits();
1815 unsigned DestBits = DstVT.getSizeInBits();
1816 return (SrcBits == 64 && DestBits == 32);
1817}
1818
1820 // Zexts are free if they can be combined with a load.
1821 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1822 // poorly with type legalization of compares preferring sext.
1823 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1824 EVT MemVT = LD->getMemoryVT();
1825 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1826 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1827 LD->getExtensionType() == ISD::ZEXTLOAD))
1828 return true;
1829 }
1830
1831 return TargetLowering::isZExtFree(Val, VT2);
1832}
1833
1835 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1836}
1837
1839 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1840}
1841
1843 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXCVbitmanip();
1844}
1845
1847 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
1848 Subtarget.hasVendorXCVbitmanip();
1849}
1850
1852 const Instruction &AndI) const {
1853 // We expect to be able to match a bit extraction instruction if the Zbs
1854 // extension is supported and the mask is a power of two. However, we
1855 // conservatively return false if the mask would fit in an ANDI instruction,
1856 // on the basis that it's possible the sinking+duplication of the AND in
1857 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
1858 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
1859 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
1860 return false;
1861 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
1862 if (!Mask)
1863 return false;
1864 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
1865}
1866
1868 EVT VT = Y.getValueType();
1869
1870 // FIXME: Support vectors once we have tests.
1871 if (VT.isVector())
1872 return false;
1873
1874 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
1875 !isa<ConstantSDNode>(Y);
1876}
1877
1879 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
1880 if (Subtarget.hasStdExtZbs())
1881 return X.getValueType().isScalarInteger();
1882 auto *C = dyn_cast<ConstantSDNode>(Y);
1883 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
1884 if (Subtarget.hasVendorXTHeadBs())
1885 return C != nullptr;
1886 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
1887 return C && C->getAPIntValue().ule(10);
1888}
1889
1891 EVT VT) const {
1892 // Only enable for rvv.
1893 if (!VT.isVector() || !Subtarget.hasVInstructions())
1894 return false;
1895
1896 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
1897 return false;
1898
1899 return true;
1900}
1901
1903 Type *Ty) const {
1904 assert(Ty->isIntegerTy());
1905
1906 unsigned BitSize = Ty->getIntegerBitWidth();
1907 if (BitSize > Subtarget.getXLen())
1908 return false;
1909
1910 // Fast path, assume 32-bit immediates are cheap.
1911 int64_t Val = Imm.getSExtValue();
1912 if (isInt<32>(Val))
1913 return true;
1914
1915 // A constant pool entry may be more aligned thant he load we're trying to
1916 // replace. If we don't support unaligned scalar mem, prefer the constant
1917 // pool.
1918 // TODO: Can the caller pass down the alignment?
1919 if (!Subtarget.hasFastUnalignedAccess())
1920 return true;
1921
1922 // Prefer to keep the load if it would require many instructions.
1923 // This uses the same threshold we use for constant pools but doesn't
1924 // check useConstantPoolForLargeInts.
1925 // TODO: Should we keep the load only when we're definitely going to emit a
1926 // constant pool?
1927
1929 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
1930}
1931
1935 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1936 SelectionDAG &DAG) const {
1937 // One interesting pattern that we'd want to form is 'bit extract':
1938 // ((1 >> Y) & 1) ==/!= 0
1939 // But we also need to be careful not to try to reverse that fold.
1940
1941 // Is this '((1 >> Y) & 1)'?
1942 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
1943 return false; // Keep the 'bit extract' pattern.
1944
1945 // Will this be '((1 >> Y) & 1)' after the transform?
1946 if (NewShiftOpcode == ISD::SRL && CC->isOne())
1947 return true; // Do form the 'bit extract' pattern.
1948
1949 // If 'X' is a constant, and we transform, then we will immediately
1950 // try to undo the fold, thus causing endless combine loop.
1951 // So only do the transform if X is not a constant. This matches the default
1952 // implementation of this function.
1953 return !XC;
1954}
1955
1956bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const {
1957 switch (Opcode) {
1958 case Instruction::Add:
1959 case Instruction::Sub:
1960 case Instruction::Mul:
1961 case Instruction::And:
1962 case Instruction::Or:
1963 case Instruction::Xor:
1964 case Instruction::FAdd:
1965 case Instruction::FSub:
1966 case Instruction::FMul:
1967 case Instruction::FDiv:
1968 case Instruction::ICmp:
1969 case Instruction::FCmp:
1970 return true;
1971 case Instruction::Shl:
1972 case Instruction::LShr:
1973 case Instruction::AShr:
1974 case Instruction::UDiv:
1975 case Instruction::SDiv:
1976 case Instruction::URem:
1977 case Instruction::SRem:
1978 return Operand == 1;
1979 default:
1980 return false;
1981 }
1982}
1983
1984
1986 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
1987 return false;
1988
1989 if (canSplatOperand(I->getOpcode(), Operand))
1990 return true;
1991
1992 auto *II = dyn_cast<IntrinsicInst>(I);
1993 if (!II)
1994 return false;
1995
1996 switch (II->getIntrinsicID()) {
1997 case Intrinsic::fma:
1998 case Intrinsic::vp_fma:
1999 return Operand == 0 || Operand == 1;
2000 case Intrinsic::vp_shl:
2001 case Intrinsic::vp_lshr:
2002 case Intrinsic::vp_ashr:
2003 case Intrinsic::vp_udiv:
2004 case Intrinsic::vp_sdiv:
2005 case Intrinsic::vp_urem:
2006 case Intrinsic::vp_srem:
2007 case Intrinsic::ssub_sat:
2008 case Intrinsic::vp_ssub_sat:
2009 case Intrinsic::usub_sat:
2010 case Intrinsic::vp_usub_sat:
2011 return Operand == 1;
2012 // These intrinsics are commutative.
2013 case Intrinsic::vp_add:
2014 case Intrinsic::vp_mul:
2015 case Intrinsic::vp_and:
2016 case Intrinsic::vp_or:
2017 case Intrinsic::vp_xor:
2018 case Intrinsic::vp_fadd:
2019 case Intrinsic::vp_fmul:
2020 case Intrinsic::vp_icmp:
2021 case Intrinsic::vp_fcmp:
2022 case Intrinsic::smin:
2023 case Intrinsic::vp_smin:
2024 case Intrinsic::umin:
2025 case Intrinsic::vp_umin:
2026 case Intrinsic::smax:
2027 case Intrinsic::vp_smax:
2028 case Intrinsic::umax:
2029 case Intrinsic::vp_umax:
2030 case Intrinsic::sadd_sat:
2031 case Intrinsic::vp_sadd_sat:
2032 case Intrinsic::uadd_sat:
2033 case Intrinsic::vp_uadd_sat:
2034 // These intrinsics have 'vr' versions.
2035 case Intrinsic::vp_sub:
2036 case Intrinsic::vp_fsub:
2037 case Intrinsic::vp_fdiv:
2038 return Operand == 0 || Operand == 1;
2039 default:
2040 return false;
2041 }
2042}
2043
2044/// Check if sinking \p I's operands to I's basic block is profitable, because
2045/// the operands can be folded into a target instruction, e.g.
2046/// splats of scalars can fold into vector instructions.
2048 Instruction *I, SmallVectorImpl<Use *> &Ops) const {
2049 using namespace llvm::PatternMatch;
2050
2051 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
2052 return false;
2053
2054 // Don't sink splat operands if the target prefers it. Some targets requires
2055 // S2V transfer buffers and we can run out of them copying the same value
2056 // repeatedly.
2057 // FIXME: It could still be worth doing if it would improve vector register
2058 // pressure and prevent a vector spill.
2059 if (!Subtarget.sinkSplatOperands())
2060 return false;
2061
2062 for (auto OpIdx : enumerate(I->operands())) {
2063 if (!canSplatOperand(I, OpIdx.index()))
2064 continue;
2065
2066 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
2067 // Make sure we are not already sinking this operand
2068 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
2069 continue;
2070
2071 // We are looking for a splat that can be sunk.
2073 m_Undef(), m_ZeroMask())))
2074 continue;
2075
2076 // Don't sink i1 splats.
2077 if (cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(1))
2078 continue;
2079
2080 // All uses of the shuffle should be sunk to avoid duplicating it across gpr
2081 // and vector registers
2082 for (Use &U : Op->uses()) {
2083 Instruction *Insn = cast<Instruction>(U.getUser());
2084 if (!canSplatOperand(Insn, U.getOperandNo()))
2085 return false;
2086 }
2087
2088 Ops.push_back(&Op->getOperandUse(0));
2089 Ops.push_back(&OpIdx.value());
2090 }
2091 return true;
2092}
2093
2095 unsigned Opc = VecOp.getOpcode();
2096
2097 // Assume target opcodes can't be scalarized.
2098 // TODO - do we have any exceptions?
2099 if (Opc >= ISD::BUILTIN_OP_END)
2100 return false;
2101
2102 // If the vector op is not supported, try to convert to scalar.
2103 EVT VecVT = VecOp.getValueType();
2104 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
2105 return true;
2106
2107 // If the vector op is supported, but the scalar op is not, the transform may
2108 // not be worthwhile.
2109 // Permit a vector binary operation can be converted to scalar binary
2110 // operation which is custom lowered with illegal type.
2111 EVT ScalarVT = VecVT.getScalarType();
2112 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2113 isOperationCustom(Opc, ScalarVT);
2114}
2115
2117 const GlobalAddressSDNode *GA) const {
2118 // In order to maximise the opportunity for common subexpression elimination,
2119 // keep a separate ADD node for the global address offset instead of folding
2120 // it in the global address node. Later peephole optimisations may choose to
2121 // fold it back in when profitable.
2122 return false;
2123}
2124
2125// Return one of the followings:
2126// (1) `{0-31 value, false}` if FLI is available for Imm's type and FP value.
2127// (2) `{0-31 value, true}` if Imm is negative and FLI is available for its
2128// positive counterpart, which will be materialized from the first returned
2129// element. The second returned element indicated that there should be a FNEG
2130// followed.
2131// (3) `{-1, _}` if there is no way FLI can be used to materialize Imm.
2132std::pair<int, bool> RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm,
2133 EVT VT) const {
2134 if (!Subtarget.hasStdExtZfa())
2135 return std::make_pair(-1, false);
2136
2137 bool IsSupportedVT = false;
2138 if (VT == MVT::f16) {
2139 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2140 } else if (VT == MVT::f32) {
2141 IsSupportedVT = true;
2142 } else if (VT == MVT::f64) {
2143 assert(Subtarget.hasStdExtD() && "Expect D extension");
2144 IsSupportedVT = true;
2145 }
2146
2147 if (!IsSupportedVT)
2148 return std::make_pair(-1, false);
2149
2151 if (Index < 0 && Imm.isNegative())
2152 // Try the combination of its positive counterpart + FNEG.
2153 return std::make_pair(RISCVLoadFPImm::getLoadFPImm(-Imm), true);
2154 else
2155 return std::make_pair(Index, false);
2156}
2157
2159 bool ForCodeSize) const {
2160 bool IsLegalVT = false;
2161 if (VT == MVT::f16)
2162 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2163 else if (VT == MVT::f32)
2164 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2165 else if (VT == MVT::f64)
2166 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2167 else if (VT == MVT::bf16)
2168 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2169
2170 if (!IsLegalVT)
2171 return false;
2172
2173 if (getLegalZfaFPImm(Imm, VT).first >= 0)
2174 return true;
2175
2176 // Cannot create a 64 bit floating-point immediate value for rv32.
2177 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2178 // td can handle +0.0 or -0.0 already.
2179 // -0.0 can be created by fmv + fneg.
2180 return Imm.isZero();
2181 }
2182
2183 // Special case: fmv + fneg
2184 if (Imm.isNegZero())
2185 return true;
2186
2187 // Building an integer and then converting requires a fmv at the end of
2188 // the integer sequence.
2189 const int Cost =
2190 1 + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(), Subtarget.getXLen(),
2191 Subtarget);
2192 return Cost <= FPImmCost;
2193}
2194
2195// TODO: This is very conservative.
2197 unsigned Index) const {
2199 return false;
2200
2201 // Only support extracting a fixed from a fixed vector for now.
2202 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2203 return false;
2204
2205 EVT EltVT = ResVT.getVectorElementType();
2206 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2207
2208 // The smallest type we can slide is i8.
2209 // TODO: We can extract index 0 from a mask vector without a slide.
2210 if (EltVT == MVT::i1)
2211 return false;
2212
2213 unsigned ResElts = ResVT.getVectorNumElements();
2214 unsigned SrcElts = SrcVT.getVectorNumElements();
2215
2216 unsigned MinVLen = Subtarget.getRealMinVLen();
2217 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2218
2219 // If we're extracting only data from the first VLEN bits of the source
2220 // then we can always do this with an m1 vslidedown.vx. Restricting the
2221 // Index ensures we can use a vslidedown.vi.
2222 // TODO: We can generalize this when the exact VLEN is known.
2223 if (Index + ResElts <= MinVLMAX && Index < 31)
2224 return true;
2225
2226 // Convervatively only handle extracting half of a vector.
2227 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2228 // a cheap extract. However, this case is important in practice for
2229 // shuffled extracts of longer vectors. How resolve?
2230 if ((ResElts * 2) != SrcElts)
2231 return false;
2232
2233 // Slide can support arbitrary index, but we only treat vslidedown.vi as
2234 // cheap.
2235 if (Index >= 32)
2236 return false;
2237
2238 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2239 // the upper half of a vector until we have more test coverage.
2240 return Index == 0 || Index == ResElts;
2241}
2242
2245 EVT VT) const {
2246 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2247 // We might still end up using a GPR but that will be decided based on ABI.
2248 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2249 !Subtarget.hasStdExtZfhminOrZhinxmin())
2250 return MVT::f32;
2251
2253
2254 if (RV64LegalI32 && Subtarget.is64Bit() && PartVT == MVT::i32)
2255 return MVT::i64;
2256
2257 return PartVT;
2258}
2259
2262 EVT VT) const {
2263 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2264 // We might still end up using a GPR but that will be decided based on ABI.
2265 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2266 !Subtarget.hasStdExtZfhminOrZhinxmin())
2267 return 1;
2268
2270}
2271
2273 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2274 unsigned &NumIntermediates, MVT &RegisterVT) const {
2276 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2277
2278 if (RV64LegalI32 && Subtarget.is64Bit() && IntermediateVT == MVT::i32)
2279 IntermediateVT = MVT::i64;
2280
2281 if (RV64LegalI32 && Subtarget.is64Bit() && RegisterVT == MVT::i32)
2282 RegisterVT = MVT::i64;
2283
2284 return NumRegs;
2285}
2286
2287// Changes the condition code and swaps operands if necessary, so the SetCC
2288// operation matches one of the comparisons supported directly by branches
2289// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2290// with 1/-1.
2291static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
2292 ISD::CondCode &CC, SelectionDAG &DAG) {
2293 // If this is a single bit test that can't be handled by ANDI, shift the
2294 // bit to be tested to the MSB and perform a signed compare with 0.
2295 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2296 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2297 isa<ConstantSDNode>(LHS.getOperand(1))) {
2298 uint64_t Mask = LHS.getConstantOperandVal(1);
2299 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2300 unsigned ShAmt = 0;
2301 if (isPowerOf2_64(Mask)) {
2303 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2304 } else {
2305 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2306 }
2307
2308 LHS = LHS.getOperand(0);
2309 if (ShAmt != 0)
2310 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2311 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2312 return;
2313 }
2314 }
2315
2316 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2317 int64_t C = RHSC->getSExtValue();
2318 switch (CC) {
2319 default: break;
2320 case ISD::SETGT:
2321 // Convert X > -1 to X >= 0.
2322 if (C == -1) {
2323 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2324 CC = ISD::SETGE;
2325 return;
2326 }
2327 break;
2328 case ISD::SETLT:
2329 // Convert X < 1 to 0 >= X.
2330 if (C == 1) {
2331 RHS = LHS;
2332 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2333 CC = ISD::SETGE;
2334 return;
2335 }
2336 break;
2337 }
2338 }
2339
2340 switch (CC) {
2341 default:
2342 break;
2343 case ISD::SETGT:
2344 case ISD::SETLE:
2345 case ISD::SETUGT:
2346 case ISD::SETULE:
2348 std::swap(LHS, RHS);
2349 break;
2350 }
2351}
2352
2354 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2355 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2356 if (VT.getVectorElementType() == MVT::i1)
2357 KnownSize *= 8;
2358
2359 switch (KnownSize) {
2360 default:
2361 llvm_unreachable("Invalid LMUL.");
2362 case 8:
2364 case 16:
2366 case 32:
2368 case 64:
2370 case 128:
2372 case 256:
2374 case 512:
2376 }
2377}
2378
2380 switch (LMul) {
2381 default:
2382 llvm_unreachable("Invalid LMUL.");
2387 return RISCV::VRRegClassID;
2389 return RISCV::VRM2RegClassID;
2391 return RISCV::VRM4RegClassID;
2393 return RISCV::VRM8RegClassID;
2394 }
2395}
2396
2398 RISCVII::VLMUL LMUL = getLMUL(VT);
2399 if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
2400 LMUL == RISCVII::VLMUL::LMUL_F4 ||
2401 LMUL == RISCVII::VLMUL::LMUL_F2 ||
2402 LMUL == RISCVII::VLMUL::LMUL_1) {
2403 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2404 "Unexpected subreg numbering");
2405 return RISCV::sub_vrm1_0 + Index;
2406 }
2407 if (LMUL == RISCVII::VLMUL::LMUL_2) {
2408 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2409 "Unexpected subreg numbering");
2410 return RISCV::sub_vrm2_0 + Index;
2411 }
2412 if (LMUL == RISCVII::VLMUL::LMUL_4) {
2413 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2414 "Unexpected subreg numbering");
2415 return RISCV::sub_vrm4_0 + Index;
2416 }
2417 llvm_unreachable("Invalid vector type.");
2418}
2419
2421 if (VT.getVectorElementType() == MVT::i1)
2422 return RISCV::VRRegClassID;
2423 return getRegClassIDForLMUL(getLMUL(VT));
2424}
2425
2426// Attempt to decompose a subvector insert/extract between VecVT and
2427// SubVecVT via subregister indices. Returns the subregister index that
2428// can perform the subvector insert/extract with the given element index, as
2429// well as the index corresponding to any leftover subvectors that must be
2430// further inserted/extracted within the register class for SubVecVT.
2431std::pair<unsigned, unsigned>
2433 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2434 const RISCVRegisterInfo *TRI) {
2435 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2436 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2437 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2438 "Register classes not ordered");
2439 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2440 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2441 // Try to compose a subregister index that takes us from the incoming
2442 // LMUL>1 register class down to the outgoing one. At each step we half
2443 // the LMUL:
2444 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2445 // Note that this is not guaranteed to find a subregister index, such as
2446 // when we are extracting from one VR type to another.
2447 unsigned SubRegIdx = RISCV::NoSubRegister;
2448 for (const unsigned RCID :
2449 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2450 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2451 VecVT = VecVT.getHalfNumVectorElementsVT();
2452 bool IsHi =
2453 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2454 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2455 getSubregIndexByMVT(VecVT, IsHi));
2456 if (IsHi)
2457 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2458 }
2459 return {SubRegIdx, InsertExtractIdx};
2460}
2461
2462// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2463// stores for those types.
2464bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2465 return !Subtarget.useRVVForFixedLengthVectors() ||
2466 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2467}
2468
2470 if (!ScalarTy.isSimple())
2471 return false;
2472 switch (ScalarTy.getSimpleVT().SimpleTy) {
2473 case MVT::iPTR:
2474 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2475 case MVT::i8:
2476 case MVT::i16:
2477 case MVT::i32:
2478 return true;
2479 case MVT::i64:
2480 return Subtarget.hasVInstructionsI64();
2481 case MVT::f16:
2482 return Subtarget.hasVInstructionsF16();
2483 case MVT::f32:
2484 return Subtarget.hasVInstructionsF32();
2485 case MVT::f64:
2486 return Subtarget.hasVInstructionsF64();
2487 default:
2488 return false;
2489 }
2490}
2491
2492
2493unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2494 return NumRepeatedDivisors;
2495}
2496
2498 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2499 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2500 "Unexpected opcode");
2501 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2502 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2504 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2505 if (!II)
2506 return SDValue();
2507 return Op.getOperand(II->VLOperand + 1 + HasChain);
2508}
2509
2511 const RISCVSubtarget &Subtarget) {
2512 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2513 if (!Subtarget.useRVVForFixedLengthVectors())
2514 return false;
2515
2516 // We only support a set of vector types with a consistent maximum fixed size
2517 // across all supported vector element types to avoid legalization issues.
2518 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2519 // fixed-length vector type we support is 1024 bytes.
2520 if (VT.getFixedSizeInBits() > 1024 * 8)
2521 return false;
2522
2523 unsigned MinVLen = Subtarget.getRealMinVLen();
2524
2525 MVT EltVT = VT.getVectorElementType();
2526
2527 // Don't use RVV for vectors we cannot scalarize if required.
2528 switch (EltVT.SimpleTy) {
2529 // i1 is supported but has different rules.
2530 default:
2531 return false;
2532 case MVT::i1:
2533 // Masks can only use a single register.
2534 if (VT.getVectorNumElements() > MinVLen)
2535 return false;
2536 MinVLen /= 8;
2537 break;
2538 case MVT::i8:
2539 case MVT::i16:
2540 case MVT::i32:
2541 break;
2542 case MVT::i64:
2543 if (!Subtarget.hasVInstructionsI64())
2544 return false;
2545 break;
2546 case MVT::f16:
2547 if (!Subtarget.hasVInstructionsF16Minimal())
2548 return false;
2549 break;
2550 case MVT::f32:
2551 if (!Subtarget.hasVInstructionsF32())
2552 return false;
2553 break;
2554 case MVT::f64:
2555 if (!Subtarget.hasVInstructionsF64())
2556 return false;
2557 break;
2558 }
2559
2560 // Reject elements larger than ELEN.
2561 if (EltVT.getSizeInBits() > Subtarget.getELen())
2562 return false;
2563
2564 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2565 // Don't use RVV for types that don't fit.
2566 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2567 return false;
2568
2569 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2570 // the base fixed length RVV support in place.
2571 if (!VT.isPow2VectorType())
2572 return false;
2573
2574 return true;
2575}
2576
2577bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2578 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2579}
2580
2581// Return the largest legal scalable vector type that matches VT's element type.
2583 const RISCVSubtarget &Subtarget) {
2584 // This may be called before legal types are setup.
2585 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2586 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2587 "Expected legal fixed length vector!");
2588
2589 unsigned MinVLen = Subtarget.getRealMinVLen();
2590 unsigned MaxELen = Subtarget.getELen();
2591
2592 MVT EltVT = VT.getVectorElementType();
2593 switch (EltVT.SimpleTy) {
2594 default:
2595 llvm_unreachable("unexpected element type for RVV container");
2596 case MVT::i1:
2597 case MVT::i8:
2598 case MVT::i16:
2599 case MVT::i32:
2600 case MVT::i64:
2601 case MVT::f16:
2602 case MVT::f32:
2603 case MVT::f64: {
2604 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2605 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2606 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2607 unsigned NumElts =
2609 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2610 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2611 return MVT::getScalableVectorVT(EltVT, NumElts);
2612 }
2613 }
2614}
2615
2617 const RISCVSubtarget &Subtarget) {
2619 Subtarget);
2620}
2621
2623 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2624}
2625
2626// Grow V to consume an entire RVV register.
2628 const RISCVSubtarget &Subtarget) {
2629 assert(VT.isScalableVector() &&
2630 "Expected to convert into a scalable vector!");
2631 assert(V.getValueType().isFixedLengthVector() &&
2632 "Expected a fixed length vector operand!");
2633 SDLoc DL(V);
2634 SDValue Zero = DAG.getVectorIdxConstant(0, DL);
2635 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
2636}
2637
2638// Shrink V so it's just big enough to maintain a VT's worth of data.
2640 const RISCVSubtarget &Subtarget) {
2642 "Expected to convert into a fixed length vector!");
2643 assert(V.getValueType().isScalableVector() &&
2644 "Expected a scalable vector operand!");
2645 SDLoc DL(V);
2646 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2647 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
2648}
2649
2650/// Return the type of the mask type suitable for masking the provided
2651/// vector type. This is simply an i1 element type vector of the same
2652/// (possibly scalable) length.
2653static MVT getMaskTypeFor(MVT VecVT) {
2654 assert(VecVT.isVector());
2656 return MVT::getVectorVT(MVT::i1, EC);
2657}
2658
2659/// Creates an all ones mask suitable for masking a vector of type VecTy with
2660/// vector length VL. .
2661static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2662 SelectionDAG &DAG) {
2663 MVT MaskVT = getMaskTypeFor(VecVT);
2664 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2665}
2666
2667static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2668 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2669 // If we know the exact VLEN, and our VL is exactly equal to VLMAX,
2670 // canonicalize the representation. InsertVSETVLI will pick the immediate
2671 // encoding later if profitable.
2672 const auto [MinVLMAX, MaxVLMAX] =
2673 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
2674 if (MinVLMAX == MaxVLMAX && NumElts == MinVLMAX)
2675 return DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2676
2677 return DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2678}
2679
2680static std::pair<SDValue, SDValue>
2682 const RISCVSubtarget &Subtarget) {
2683 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2684 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2685 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2686 return {Mask, VL};
2687}
2688
2689static std::pair<SDValue, SDValue>
2690getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2691 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2692 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2693 SDValue VL = getVLOp(NumElts, ContainerVT, DL, DAG, Subtarget);
2694 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2695 return {Mask, VL};
2696}
2697
2698// Gets the two common "VL" operands: an all-ones mask and the vector length.
2699// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2700// the vector type that the fixed-length vector is contained in. Otherwise if
2701// VecVT is scalable, then ContainerVT should be the same as VecVT.
2702static std::pair<SDValue, SDValue>
2703getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2704 const RISCVSubtarget &Subtarget) {
2705 if (VecVT.isFixedLengthVector())
2706 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2707 Subtarget);
2708 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2709 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
2710}
2711
2713 SelectionDAG &DAG) const {
2714 assert(VecVT.isScalableVector() && "Expected scalable vector");
2715 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2716 VecVT.getVectorElementCount());
2717}
2718
2719std::pair<unsigned, unsigned>
2721 const RISCVSubtarget &Subtarget) {
2722 assert(VecVT.isScalableVector() && "Expected scalable vector");
2723
2724 unsigned EltSize = VecVT.getScalarSizeInBits();
2725 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
2726
2727 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
2728 unsigned MaxVLMAX =
2729 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
2730
2731 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
2732 unsigned MinVLMAX =
2733 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
2734
2735 return std::make_pair(MinVLMAX, MaxVLMAX);
2736}
2737
2738// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2739// of either is (currently) supported. This can get us into an infinite loop
2740// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2741// as a ..., etc.
2742// Until either (or both) of these can reliably lower any node, reporting that
2743// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2744// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2745// which is not desirable.
2747 EVT VT, unsigned DefinedValues) const {
2748 return false;
2749}
2750
2752 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2753 // implementation-defined.
2754 if (!VT.isVector())
2756 unsigned DLenFactor = Subtarget.getDLenFactor();
2757 unsigned Cost;
2758 if (VT.isScalableVector()) {
2759 unsigned LMul;
2760 bool Fractional;
2761 std::tie(LMul, Fractional) =
2763 if (Fractional)
2764 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
2765 else
2766 Cost = (LMul * DLenFactor);
2767 } else {
2768 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
2769 }
2770 return Cost;
2771}
2772
2773
2774/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
2775/// is generally quadratic in the number of vreg implied by LMUL. Note that
2776/// operand (index and possibly mask) are handled separately.
2778 return getLMULCost(VT) * getLMULCost(VT);
2779}
2780
2781/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2782/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2783/// or may track the vrgather.vv cost. It is implementation-dependent.
2785 return getLMULCost(VT);
2786}
2787
2788/// Return the cost of a vslidedown.vx or vslideup.vx instruction
2789/// for the type VT. (This does not cover the vslide1up or vslide1down
2790/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2791/// or may track the vrgather.vv cost. It is implementation-dependent.
2793 return getLMULCost(VT);
2794}
2795
2796/// Return the cost of a vslidedown.vi or vslideup.vi instruction
2797/// for the type VT. (This does not cover the vslide1up or vslide1down
2798/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2799/// or may track the vrgather.vv cost. It is implementation-dependent.
2801 return getLMULCost(VT);
2802}
2803
2805 const RISCVSubtarget &Subtarget) {
2806 // RISC-V FP-to-int conversions saturate to the destination register size, but
2807 // don't produce 0 for nan. We can use a conversion instruction and fix the
2808 // nan case with a compare and a select.
2809 SDValue Src = Op.getOperand(0);
2810
2811 MVT DstVT = Op.getSimpleValueType();
2812 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2813
2814 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
2815
2816 if (!DstVT.isVector()) {
2817 // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
2818 // the result.
2819 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2820 Src.getValueType() == MVT::bf16) {
2821 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
2822 }
2823
2824 unsigned Opc;
2825 if (SatVT == DstVT)
2826 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
2827 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
2829 else
2830 return SDValue();
2831 // FIXME: Support other SatVTs by clamping before or after the conversion.
2832
2833 SDLoc DL(Op);
2834 SDValue FpToInt = DAG.getNode(
2835 Opc, DL, DstVT, Src,
2837
2838 if (Opc == RISCVISD::FCVT_WU_RV64)
2839 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
2840
2841 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
2842 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
2844 }
2845
2846 // Vectors.
2847
2848 MVT DstEltVT = DstVT.getVectorElementType();
2849 MVT SrcVT = Src.getSimpleValueType();
2850 MVT SrcEltVT = SrcVT.getVectorElementType();
2851 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2852 unsigned DstEltSize = DstEltVT.getSizeInBits();
2853
2854 // Only handle saturating to the destination type.
2855 if (SatVT != DstEltVT)
2856 return SDValue();
2857
2858 // FIXME: Don't support narrowing by more than 1 steps for now.
2859 if (SrcEltSize > (2 * DstEltSize))
2860 return SDValue();
2861
2862 MVT DstContainerVT = DstVT;
2863 MVT SrcContainerVT = SrcVT;
2864 if (DstVT.isFixedLengthVector()) {
2865 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
2866 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
2867 assert(DstContainerVT.getVectorElementCount() ==
2868 SrcContainerVT.getVectorElementCount() &&
2869 "Expected same element count");
2870 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2871 }
2872
2873 SDLoc DL(Op);
2874
2875 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
2876
2877 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
2878 {Src, Src, DAG.getCondCode(ISD::SETNE),
2879 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2880
2881 // Need to widen by more than 1 step, promote the FP type, then do a widening
2882 // convert.
2883 if (DstEltSize > (2 * SrcEltSize)) {
2884 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
2885 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
2886 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
2887 }
2888
2889 unsigned RVVOpc =
2891 SDValue Res = DAG.getNode(RVVOpc, DL, DstContainerVT, Src, Mask, VL);
2892
2893 SDValue SplatZero = DAG.getNode(
2894 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
2895 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
2896 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
2897 Res, DAG.getUNDEF(DstContainerVT), VL);
2898
2899 if (DstVT.isFixedLengthVector())
2900 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
2901
2902 return Res;
2903}
2904
2906 switch (Opc) {
2907 case ISD::FROUNDEVEN:
2909 case ISD::VP_FROUNDEVEN:
2910 return RISCVFPRndMode::RNE;
2911 case ISD::FTRUNC:
2912 case ISD::STRICT_FTRUNC:
2913 case ISD::VP_FROUNDTOZERO:
2914 return RISCVFPRndMode::RTZ;
2915 case ISD::FFLOOR:
2916 case ISD::STRICT_FFLOOR:
2917 case ISD::VP_FFLOOR:
2918 return RISCVFPRndMode::RDN;
2919 case ISD::FCEIL:
2920 case ISD::STRICT_FCEIL:
2921 case ISD::VP_FCEIL:
2922 return RISCVFPRndMode::RUP;
2923 case ISD::FROUND:
2924 case ISD::STRICT_FROUND:
2925 case ISD::VP_FROUND:
2926 return RISCVFPRndMode::RMM;
2927 case ISD::FRINT:
2928 return RISCVFPRndMode::DYN;
2929 }
2930
2932}
2933
2934// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
2935// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
2936// the integer domain and back. Taking care to avoid converting values that are
2937// nan or already correct.
2938static SDValue
2940 const RISCVSubtarget &Subtarget) {
2941 MVT VT = Op.getSimpleValueType();
2942 assert(VT.isVector() && "Unexpected type");
2943
2944 SDLoc DL(Op);
2945
2946 SDValue Src = Op.getOperand(0);
2947
2948 MVT ContainerVT = VT;
2949 if (VT.isFixedLengthVector()) {
2950 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2951 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
2952 }
2953
2954 SDValue Mask, VL;
2955 if (Op->isVPOpcode()) {
2956 Mask = Op.getOperand(1);
2957 if (VT.isFixedLengthVector())
2958 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
2959 Subtarget);
2960 VL = Op.getOperand(2);
2961 } else {
2962 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2963 }
2964
2965 // Freeze the source since we are increasing the number of uses.
2966 Src = DAG.getFreeze(Src);
2967
2968 // We do the conversion on the absolute value and fix the sign at the end.
2969 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
2970
2971 // Determine the largest integer that can be represented exactly. This and
2972 // values larger than it don't have any fractional bits so don't need to
2973 // be converted.
2974 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
2975 unsigned Precision = APFloat::semanticsPrecision(FltSem);
2976 APFloat MaxVal = APFloat(FltSem);
2977 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
2978 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
2979 SDValue MaxValNode =
2980 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
2981 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
2982 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
2983
2984 // If abs(Src) was larger than MaxVal or nan, keep it.
2985 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
2986 Mask =
2987 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
2988 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
2989 Mask, Mask, VL});
2990
2991 // Truncate to integer and convert back to FP.
2992 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
2993 MVT XLenVT = Subtarget.getXLenVT();
2994 SDValue Truncated;
2995
2996 switch (Op.getOpcode()) {
2997 default:
2998 llvm_unreachable("Unexpected opcode");
2999 case ISD::FCEIL:
3000 case ISD::VP_FCEIL:
3001 case ISD::FFLOOR:
3002 case ISD::VP_FFLOOR:
3003 case ISD::FROUND:
3004 case ISD::FROUNDEVEN:
3005 case ISD::VP_FROUND:
3006 case ISD::VP_FROUNDEVEN:
3007 case ISD::VP_FROUNDTOZERO: {
3010 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
3011 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
3012 break;
3013 }
3014 case ISD::FTRUNC:
3015 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
3016 Mask, VL);
3017 break;
3018 case ISD::FRINT:
3019 case ISD::VP_FRINT:
3020 Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, VL);
3021 break;
3022 case ISD::FNEARBYINT:
3023 case ISD::VP_FNEARBYINT:
3024 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3025 Mask, VL);
3026 break;
3027 }
3028
3029 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3030 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3031 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3032 Mask, VL);
3033
3034 // Restore the original sign so that -0.0 is preserved.
3035 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3036 Src, Src, Mask, VL);
3037
3038 if (!VT.isFixedLengthVector())
3039 return Truncated;
3040
3041 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3042}
3043
3044// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3045// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3046// qNan and coverting the new source to integer and back to FP.
3047static SDValue
3049 const RISCVSubtarget &Subtarget) {
3050 SDLoc DL(Op);
3051 MVT VT = Op.getSimpleValueType();
3052 SDValue Chain = Op.getOperand(0);
3053 SDValue Src = Op.getOperand(1);
3054
3055 MVT ContainerVT = VT;
3056 if (VT.isFixedLengthVector()) {
3057 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3058 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3059 }
3060
3061 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3062
3063 // Freeze the source since we are increasing the number of uses.
3064 Src = DAG.getFreeze(Src);
3065
3066 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
3067 MVT MaskVT = Mask.getSimpleValueType();
3069 DAG.getVTList(MaskVT, MVT::Other),
3070 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3071 DAG.getUNDEF(MaskVT), Mask, VL});
3072 Chain = Unorder.getValue(1);
3074 DAG.getVTList(ContainerVT, MVT::Other),
3075 {Chain, Src, Src, DAG.getUNDEF(ContainerVT), Unorder, VL});
3076 Chain = Src.getValue(1);
3077
3078 // We do the conversion on the absolute value and fix the sign at the end.
3079 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3080
3081 // Determine the largest integer that can be represented exactly. This and
3082 // values larger than it don't have any fractional bits so don't need to
3083 // be converted.
3084 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
3085 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3086 APFloat MaxVal = APFloat(FltSem);
3087 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3088 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3089 SDValue MaxValNode =
3090 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3091 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3092 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3093
3094 // If abs(Src) was larger than MaxVal or nan, keep it.
3095 Mask = DAG.getNode(
3096 RISCVISD::SETCC_VL, DL, MaskVT,
3097 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3098
3099 // Truncate to integer and convert back to FP.
3100 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3101 MVT XLenVT = Subtarget.getXLenVT();
3102 SDValue Truncated;
3103
3104 switch (Op.getOpcode()) {
3105 default:
3106 llvm_unreachable("Unexpected opcode");
3107 case ISD::STRICT_FCEIL:
3108 case ISD::STRICT_FFLOOR:
3109 case ISD::STRICT_FROUND:
3113 Truncated = DAG.getNode(
3114 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3115 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3116 break;
3117 }
3118 case ISD::STRICT_FTRUNC:
3119 Truncated =
3121 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3122 break;
3125 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3126 Mask, VL);
3127 break;
3128 }
3129 Chain = Truncated.getValue(1);
3130
3131 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3132 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3133 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3134 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3135 Truncated, Mask, VL);
3136 Chain = Truncated.getValue(1);
3137 }
3138
3139 // Restore the original sign so that -0.0 is preserved.
3140 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3141 Src, Src, Mask, VL);
3142
3143 if (VT.isFixedLengthVector())
3144 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3145 return DAG.getMergeValues({Truncated, Chain}, DL);
3146}
3147
3148static SDValue
3150 const RISCVSubtarget &Subtarget) {
3151 MVT VT = Op.getSimpleValueType();
3152 if (VT.isVector())
3153 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3154
3155 if (DAG.shouldOptForSize())
3156 return SDValue();
3157
3158 SDLoc DL(Op);
3159 SDValue Src = Op.getOperand(0);
3160
3161 // Create an integer the size of the mantissa with the MSB set. This and all
3162 // values larger than it don't have any fractional bits so don't need to be
3163 // converted.
3164 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
3165 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3166 APFloat MaxVal = APFloat(FltSem);
3167 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3168 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3169 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3170
3172 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3173 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3174}
3175
3176// Expand vector LRINT and LLRINT by converting to the integer domain.
3178 const RISCVSubtarget &Subtarget) {
3179 MVT VT = Op.getSimpleValueType();
3180 assert(VT.isVector() && "Unexpected type");
3181
3182 SDLoc DL(Op);
3183 SDValue Src = Op.getOperand(0);
3184 MVT ContainerVT = VT;
3185
3186 if (VT.isFixedLengthVector()) {
3187 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3188 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3189 }
3190
3191 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3192 SDValue Truncated =
3193 DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, ContainerVT, Src, Mask, VL);
3194
3195 if (!VT.isFixedLengthVector())
3196 return Truncated;
3197
3198 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3199}
3200
3201static SDValue
3203 const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op,
3204 SDValue Offset, SDValue Mask, SDValue VL,
3206 if (Merge.isUndef())
3208 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3209 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3210 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3211}
3212
3213static SDValue
3214getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3216 SDValue VL,
3218 if (Merge.isUndef())
3220 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3221 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3222 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3223}
3224
3225static MVT getLMUL1VT(MVT VT) {
3227 "Unexpected vector MVT");
3231}
3232
3236 int64_t Addend;
3237};
3238
3239static std::optional<uint64_t> getExactInteger(const APFloat &APF,
3241 // We will use a SINT_TO_FP to materialize this constant so we should use a
3242 // signed APSInt here.
3243 APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3244 // We use an arbitrary rounding mode here. If a floating-point is an exact
3245 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3246 // the rounding mode changes the output value, then it is not an exact
3247 // integer.
3249 bool IsExact;
3250 // If it is out of signed integer range, it will return an invalid operation.
3251 // If it is not an exact integer, IsExact is false.
3252 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3254 !IsExact)
3255 return std::nullopt;
3256 return ValInt.extractBitsAsZExtValue(BitWidth, 0);
3257}
3258
3259// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3260// to the (non-zero) step S and start value X. This can be then lowered as the
3261// RVV sequence (VID * S) + X, for example.
3262// The step S is represented as an integer numerator divided by a positive
3263// denominator. Note that the implementation currently only identifies
3264// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3265// cannot detect 2/3, for example.
3266// Note that this method will also match potentially unappealing index
3267// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3268// determine whether this is worth generating code for.
3269static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3270 unsigned EltSizeInBits) {
3271 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3272 if (!cast<BuildVectorSDNode>(Op)->isConstant())
3273 return std::nullopt;
3274 bool IsInteger = Op.getValueType().isInteger();
3275
3276 std::optional<unsigned> SeqStepDenom;
3277 std::optional<int64_t> SeqStepNum, SeqAddend;
3278 std::optional<std::pair<uint64_t, unsigned>> PrevElt;
3279 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3280
3281 // First extract the ops into a list of constant integer values. This may not
3282 // be possible for floats if they're not all representable as integers.
3284 const unsigned OpSize = Op.getScalarValueSizeInBits();
3285 for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3286 if (Elt.isUndef()) {
3287 Elts[Idx] = std::nullopt;
3288 continue;
3289 }
3290 if (IsInteger) {
3291 Elts[Idx] = Elt->getAsZExtVal() & maskTrailingOnes<uint64_t>(OpSize);
3292 } else {
3293 auto ExactInteger =
3294 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3295 if (!ExactInteger)
3296 return std::nullopt;
3297 Elts[Idx] = *ExactInteger;
3298 }
3299 }
3300
3301 for (auto [Idx, Elt] : enumerate(Elts)) {
3302 // Assume undef elements match the sequence; we just have to be careful
3303 // when interpolating across them.
3304 if (!Elt)
3305 continue;
3306
3307 if (PrevElt) {
3308 // Calculate the step since the last non-undef element, and ensure
3309 // it's consistent across the entire sequence.
3310 unsigned IdxDiff = Idx - PrevElt->second;
3311 int64_t ValDiff = SignExtend64(*Elt - PrevElt->first, EltSizeInBits);
3312
3313 // A zero-value value difference means that we're somewhere in the middle
3314 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3315 // step change before evaluating the sequence.
3316 if (ValDiff == 0)
3317 continue;
3318
3319 int64_t Remainder = ValDiff % IdxDiff;
3320 // Normalize the step if it's greater than 1.
3321 if (Remainder != ValDiff) {
3322 // The difference must cleanly divide the element span.
3323 if (Remainder != 0)
3324 return std::nullopt;
3325 ValDiff /= IdxDiff;
3326 IdxDiff = 1;
3327 }
3328
3329 if (!SeqStepNum)
3330 SeqStepNum = ValDiff;
3331 else if (ValDiff != SeqStepNum)
3332 return std::nullopt;
3333
3334 if (!SeqStepDenom)
3335 SeqStepDenom = IdxDiff;
3336 else if (IdxDiff != *SeqStepDenom)
3337 return std::nullopt;
3338 }
3339
3340 // Record this non-undef element for later.
3341 if (!PrevElt || PrevElt->first != *Elt)
3342 PrevElt = std::make_pair(*Elt, Idx);
3343 }
3344
3345 // We need to have logged a step for this to count as a legal index sequence.
3346 if (!SeqStepNum || !SeqStepDenom)
3347 return std::nullopt;
3348
3349 // Loop back through the sequence and validate elements we might have skipped
3350 // while waiting for a valid step. While doing this, log any sequence addend.
3351 for (auto [Idx, Elt] : enumerate(Elts)) {
3352 if (!Elt)
3353 continue;
3354 uint64_t ExpectedVal =
3355 (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
3356 int64_t Addend = SignExtend64(*Elt - ExpectedVal, EltSizeInBits);
3357 if (!SeqAddend)
3358 SeqAddend = Addend;
3359 else if (Addend != SeqAddend)
3360 return std::nullopt;
3361 }
3362
3363 assert(SeqAddend && "Must have an addend if we have a step");
3364
3365 return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
3366}
3367
3368// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3369// and lower it as a VRGATHER_VX_VL from the source vector.
3370static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3371 SelectionDAG &DAG,
3372 const RISCVSubtarget &Subtarget) {
3373 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3374 return SDValue();
3375 SDValue Vec = SplatVal.getOperand(0);
3376 // Only perform this optimization on vectors of the same size for simplicity.
3377 // Don't perform this optimization for i1 vectors.
3378 // FIXME: Support i1 vectors, maybe by promoting to i8?
3379 if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1)
3380 return SDValue();
3381 SDValue Idx = SplatVal.getOperand(1);
3382 // The index must be a legal type.
3383 if (Idx.getValueType() != Subtarget.getXLenVT())
3384 return SDValue();
3385
3386 MVT ContainerVT = VT;
3387 if (VT.isFixedLengthVector()) {
3388 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3389 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3390 }
3391
3392 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3393
3394 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
3395 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3396
3397 if (!VT.isFixedLengthVector())
3398 return Gather;
3399
3400 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
3401}
3402
3403
3404/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3405/// which constitute a large proportion of the elements. In such cases we can
3406/// splat a vector with the dominant element and make up the shortfall with
3407/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3408/// Note that this includes vectors of 2 elements by association. The
3409/// upper-most element is the "dominant" one, allowing us to use a splat to
3410/// "insert" the upper element, and an insert of the lower element at position
3411/// 0, which improves codegen.
3413 const RISCVSubtarget &Subtarget) {
3414 MVT VT = Op.getSimpleValueType();
3415 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3416
3417 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3418
3419 SDLoc DL(Op);
3420 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3421
3422 MVT XLenVT = Subtarget.getXLenVT();
3423 unsigned NumElts = Op.getNumOperands();
3424
3425 SDValue DominantValue;
3426 unsigned MostCommonCount = 0;
3427 DenseMap<SDValue, unsigned> ValueCounts;
3428 unsigned NumUndefElts =
3429 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3430
3431 // Track the number of scalar loads we know we'd be inserting, estimated as
3432 // any non-zero floating-point constant. Other kinds of element are either
3433 // already in registers or are materialized on demand. The threshold at which
3434 // a vector load is more desirable than several scalar materializion and
3435 // vector-insertion instructions is not known.
3436 unsigned NumScalarLoads = 0;
3437
3438 for (SDValue V : Op->op_values()) {
3439 if (V.isUndef())
3440 continue;
3441
3442 ValueCounts.insert(std::make_pair(V, 0));
3443 unsigned &Count = ValueCounts[V];
3444 if (0 == Count)
3445 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3446 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3447
3448 // Is this value dominant? In case of a tie, prefer the highest element as
3449 // it's cheaper to insert near the beginning of a vector than it is at the
3450 // end.
3451 if (++Count >= MostCommonCount) {
3452 DominantValue = V;
3453 MostCommonCount = Count;
3454 }
3455 }
3456
3457 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3458 unsigned NumDefElts = NumElts - NumUndefElts;
3459 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3460
3461 // Don't perform this optimization when optimizing for size, since
3462 // materializing elements and inserting them tends to cause code bloat.
3463 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3464 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3465 ((MostCommonCount > DominantValueCountThreshold) ||
3466 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3467 // Start by splatting the most common element.
3468 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3469
3470 DenseSet<SDValue> Processed{DominantValue};
3471
3472 // We can handle an insert into the last element (of a splat) via
3473 // v(f)slide1down. This is slightly better than the vslideup insert
3474 // lowering as it avoids the need for a vector group temporary. It
3475 // is also better than using vmerge.vx as it avoids the need to
3476 // materialize the mask in a vector register.
3477 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3478 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3479 LastOp != DominantValue) {
3480 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3481 auto OpCode =
3483 if (!VT.isFloatingPoint())
3484 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3485 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3486 LastOp, Mask, VL);
3487 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3488 Processed.insert(LastOp);
3489 }
3490
3491 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3492 for (const auto &OpIdx : enumerate(Op->ops())) {
3493 const SDValue &V = OpIdx.value();
3494 if (V.isUndef() || !Processed.insert(V).second)
3495 continue;
3496 if (ValueCounts[V] == 1) {
3497 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
3498 DAG.getVectorIdxConstant(OpIdx.index(), DL));
3499 } else {
3500 // Blend in all instances of this value using a VSELECT, using a
3501 // mask where each bit signals whether that element is the one
3502 // we're after.
3504 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3505 return DAG.getConstant(V == V1, DL, XLenVT);
3506 });
3507 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3508 DAG.getBuildVector(SelMaskTy, DL, Ops),
3509 DAG.getSplatBuildVector(VT, DL, V), Vec);
3510 }
3511 }
3512
3513 return Vec;
3514 }
3515
3516 return SDValue();
3517}
3518
3520 const RISCVSubtarget &Subtarget) {
3521 MVT VT = Op.getSimpleValueType();
3522 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3523
3524 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3525
3526 SDLoc DL(Op);
3527 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3528
3529 MVT XLenVT = Subtarget.getXLenVT();
3530 unsigned NumElts = Op.getNumOperands();
3531
3532 if (VT.getVectorElementType() == MVT::i1) {
3533 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
3534 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
3535 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
3536 }
3537
3538 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
3539 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
3540 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3541 }
3542
3543 // Lower constant mask BUILD_VECTORs via an integer vector type, in
3544 // scalar integer chunks whose bit-width depends on the number of mask
3545 // bits and XLEN.
3546 // First, determine the most appropriate scalar integer type to use. This
3547 // is at most XLenVT, but may be shrunk to a smaller vector element type
3548 // according to the size of the final vector - use i8 chunks rather than
3549 // XLenVT if we're producing a v8i1. This results in more consistent
3550 // codegen across RV32 and RV64.
3551 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
3552 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
3553 // If we have to use more than one INSERT_VECTOR_ELT then this
3554 // optimization is likely to increase code size; avoid peforming it in
3555 // such a case. We can use a load from a constant pool in this case.
3556 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3557 return SDValue();
3558 // Now we can create our integer vector type. Note that it may be larger
3559 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3560 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
3561 MVT IntegerViaVecVT =
3562 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
3563 IntegerViaVecElts);
3564
3565 uint64_t Bits = 0;
3566 unsigned BitPos = 0, IntegerEltIdx = 0;
3567 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
3568
3569 for (unsigned I = 0; I < NumElts;) {
3570 SDValue V = Op.getOperand(I);
3571 bool BitValue = !V.isUndef() && V->getAsZExtVal();
3572 Bits |= ((uint64_t)BitValue << BitPos);
3573 ++BitPos;
3574 ++I;
3575
3576 // Once we accumulate enough bits to fill our scalar type or process the
3577 // last element, insert into our vector and clear our accumulated data.
3578 if (I % NumViaIntegerBits == 0 || I == NumElts) {
3579 if (NumViaIntegerBits <= 32)
3580 Bits = SignExtend64<32>(Bits);
3581 SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
3582 Elts[IntegerEltIdx] = Elt;
3583 Bits = 0;
3584 BitPos = 0;
3585 IntegerEltIdx++;
3586 }
3587 }
3588
3589 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
3590
3591 if (NumElts < NumViaIntegerBits) {
3592 // If we're producing a smaller vector than our minimum legal integer
3593 // type, bitcast to the equivalent (known-legal) mask type, and extract
3594 // our final mask.
3595 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3596 Vec = DAG.getBitcast(MVT::v8i1, Vec);
3597 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3598 DAG.getConstant(0, DL, XLenVT));
3599 } else {
3600 // Else we must have produced an integer type with the same size as the
3601 // mask type; bitcast for the final result.
3602 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3603 Vec = DAG.getBitcast(VT, Vec);
3604 }
3605
3606 return Vec;
3607 }
3608
3609 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3610 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3612 if (!VT.isFloatingPoint())
3613 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3614 Splat =
3615 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3616 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3617 }
3618
3619 // Try and match index sequences, which we can lower to the vid instruction
3620 // with optional modifications. An all-undef vector is matched by
3621 // getSplatValue, above.
3622 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3623 int64_t StepNumerator = SimpleVID->StepNumerator;
3624 unsigned StepDenominator = SimpleVID->StepDenominator;
3625 int64_t Addend = SimpleVID->Addend;
3626
3627 assert(StepNumerator != 0 && "Invalid step");
3628 bool Negate = false;
3629 int64_t SplatStepVal = StepNumerator;
3630 unsigned StepOpcode = ISD::MUL;
3631 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3632 // anyway as the shift of 63 won't fit in uimm5.
3633 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3634 isPowerOf2_64(std::abs(StepNumerator))) {
3635 Negate = StepNumerator < 0;
3636 StepOpcode = ISD::SHL;
3637 SplatStepVal = Log2_64(std::abs(StepNumerator));
3638 }
3639
3640 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3641 // threshold since it's the immediate value many RVV instructions accept.
3642 // There is no vmul.vi instruction so ensure multiply constant can fit in
3643 // a single addi instruction.
3644 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3645 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3646 isPowerOf2_32(StepDenominator) &&
3647 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3648 MVT VIDVT =
3650 MVT VIDContainerVT =
3651 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3652 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3653 // Convert right out of the scalable type so we can use standard ISD
3654 // nodes for the rest of the computation. If we used scalable types with
3655 // these, we'd lose the fixed-length vector info and generate worse
3656 // vsetvli code.
3657 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3658 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3659 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3660 SDValue SplatStep = DAG.getConstant(SplatStepVal, DL, VIDVT);
3661 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3662 }
3663 if (StepDenominator != 1) {
3664 SDValue SplatStep =
3665 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3666 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3667 }
3668 if (Addend != 0 || Negate) {
3669 SDValue SplatAddend = DAG.getConstant(Addend, DL, VIDVT);
3670 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3671 VID);
3672 }
3673 if (VT.isFloatingPoint()) {
3674 // TODO: Use vfwcvt to reduce register pressure.
3675 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3676 }
3677 return VID;
3678 }
3679 }
3680
3681 // For very small build_vectors, use a single scalar insert of a constant.
3682 // TODO: Base this on constant rematerialization cost, not size.
3683 const unsigned EltBitSize = VT.getScalarSizeInBits();
3684 if (VT.getSizeInBits() <= 32 &&
3686 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
3687 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
3688 "Unexpected sequence type");
3689 // If we can use the original VL with the modified element type, this
3690 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3691 // be moved into InsertVSETVLI?
3692 unsigned ViaVecLen =
3693 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
3694 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3695
3696 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3697 uint64_t SplatValue = 0;
3698 // Construct the amalgamated value at this larger vector type.
3699 for (const auto &OpIdx : enumerate(Op->op_values())) {
3700 const auto &SeqV = OpIdx.value();
3701 if (!SeqV.isUndef())
3702 SplatValue |=
3703 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
3704 }
3705
3706 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3707 // achieve better constant materializion.
3708 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3709 SplatValue = SignExtend64<32>(SplatValue);
3710
3711 SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT,
3712 DAG.getUNDEF(ViaVecVT),
3713 DAG.getConstant(SplatValue, DL, XLenVT),
3714 DAG.getVectorIdxConstant(0, DL));
3715 if (ViaVecLen != 1)
3717 MVT::getVectorVT(ViaIntVT, 1), Vec,
3718 DAG.getConstant(0, DL, XLenVT));
3719 return DAG.getBitcast(VT, Vec);
3720 }
3721
3722
3723 // Attempt to detect "hidden" splats, which only reveal themselves as splats
3724 // when re-interpreted as a vector with a larger element type. For example,
3725 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3726 // could be instead splat as
3727 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
3728 // TODO: This optimization could also work on non-constant splats, but it
3729 // would require bit-manipulation instructions to construct the splat value.
3730 SmallVector<SDValue> Sequence;
3731 const auto *BV = cast<BuildVectorSDNode>(Op);
3732 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
3734 BV->getRepeatedSequence(Sequence) &&
3735 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
3736 unsigned SeqLen = Sequence.size();
3737 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
3738 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3739 ViaIntVT == MVT::i64) &&
3740 "Unexpected sequence type");
3741
3742 // If we can use the original VL with the modified element type, this
3743 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3744 // be moved into InsertVSETVLI?
3745 const unsigned RequiredVL = NumElts / SeqLen;
3746 const unsigned ViaVecLen =
3747 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
3748 NumElts : RequiredVL;
3749 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3750
3751 unsigned EltIdx = 0;
3752 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3753 uint64_t SplatValue = 0;
3754 // Construct the amalgamated value which can be splatted as this larger
3755 // vector type.
3756 for (const auto &SeqV : Sequence) {
3757 if (!SeqV.isUndef())
3758 SplatValue |=
3759 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
3760 EltIdx++;
3761 }
3762
3763 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3764 // achieve better constant materializion.
3765 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3766 SplatValue = SignExtend64<32>(SplatValue);
3767
3768 // Since we can't introduce illegal i64 types at this stage, we can only
3769 // perform an i64 splat on RV32 if it is its own sign-extended value. That
3770 // way we can use RVV instructions to splat.
3771 assert((ViaIntVT.bitsLE(XLenVT) ||
3772 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
3773 "Unexpected bitcast sequence");
3774 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
3775 SDValue ViaVL =
3776 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
3777 MVT ViaContainerVT =
3778 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
3779 SDValue Splat =
3780 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
3781 DAG.getUNDEF(ViaContainerVT),
3782 DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
3783 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
3784 if (ViaVecLen != RequiredVL)
3786 MVT::getVectorVT(ViaIntVT, RequiredVL), Splat,
3787 DAG.getConstant(0, DL, XLenVT));
3788 return DAG.getBitcast(VT, Splat);
3789 }
3790 }
3791
3792 // If the number of signbits allows, see if we can lower as a <N x i8>.
3793 // Our main goal here is to reduce LMUL (and thus work) required to
3794 // build the constant, but we will also narrow if the resulting
3795 // narrow vector is known to materialize cheaply.
3796 // TODO: We really should be costing the smaller vector. There are
3797 // profitable cases this misses.
3798 if (EltBitSize > 8 && VT.isInteger() &&
3799 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen())) {
3800 unsigned SignBits = DAG.ComputeNumSignBits(Op);
3801 if (EltBitSize - SignBits < 8) {
3802 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
3803 DL, Op->ops());
3804 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
3805 Source, DAG, Subtarget);
3806 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
3807 return convertFromScalableVector(VT, Res, DAG, Subtarget);
3808 }
3809 }
3810
3811 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3812 return Res;
3813
3814 // For constant vectors, use generic constant pool lowering. Otherwise,
3815 // we'd have to materialize constants in GPRs just to move them into the
3816 // vector.
3817 return SDValue();
3818}
3819
3821 const RISCVSubtarget &Subtarget) {
3822 MVT VT = Op.getSimpleValueType();
3823 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3824
3825 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
3827 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
3828
3829 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3830
3831 SDLoc DL(Op);
3832 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3833
3834 MVT XLenVT = Subtarget.getXLenVT();
3835
3836 if (VT.getVectorElementType() == MVT::i1) {
3837 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
3838 // vector type, we have a legal equivalently-sized i8 type, so we can use
3839 // that.
3840 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
3841 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
3842
3843 SDValue WideVec;
3844 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3845 // For a splat, perform a scalar truncate before creating the wider
3846 // vector.
3847 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
3848 DAG.getConstant(1, DL, Splat.getValueType()));
3849 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
3850 } else {
3851 SmallVector<SDValue, 8> Ops(Op->op_values());
3852 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
3853 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
3854 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
3855 }
3856
3857 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
3858 }
3859
3860 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3861 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
3862 return Gather;
3863 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3865 if (!VT.isFloatingPoint())
3866 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3867 Splat =
3868 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3869 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3870 }
3871
3872 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3873 return Res;
3874
3875 // If we're compiling for an exact VLEN value, we can split our work per
3876 // register in the register group.
3877 if (const auto VLen = Subtarget.getRealVLen();
3878 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
3879 MVT ElemVT = VT.getVectorElementType();
3880 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
3881 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3882 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
3883 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
3884 assert(M1VT == getLMUL1VT(M1VT));
3885
3886 // The following semantically builds up a fixed length concat_vector
3887 // of the component build_vectors. We eagerly lower to scalable and
3888 // insert_subvector here to avoid DAG combining it back to a large
3889 // build_vector.
3890 SmallVector<SDValue> BuildVectorOps(Op->op_begin(), Op->op_end());
3891 unsigned NumOpElts = M1VT.getVectorMinNumElements();
3892 SDValue Vec = DAG.getUNDEF(ContainerVT);
3893 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
3894 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
3895 SDValue SubBV =
3896 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
3897 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
3898 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
3899 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubBV,
3900 DAG.getVectorIdxConstant(InsertIdx, DL));
3901 }
3902 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
3903 }
3904
3905 // For m1 vectors, if we have non-undef values in both halves of our vector,
3906 // split the vector into low and high halves, build them separately, then
3907 // use a vselect to combine them. For long vectors, this cuts the critical
3908 // path of the vslide1down sequence in half, and gives us an opportunity
3909 // to special case each half independently. Note that we don't change the
3910 // length of the sub-vectors here, so if both fallback to the generic
3911 // vslide1down path, we should be able to fold the vselect into the final
3912 // vslidedown (for the undef tail) for the first half w/ masking.
3913 unsigned NumElts = VT.getVectorNumElements();
3914 unsigned NumUndefElts =
3915 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3916 unsigned NumDefElts = NumElts - NumUndefElts;
3917 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
3918 ContainerVT.bitsLE(getLMUL1VT(ContainerVT))) {
3919 SmallVector<SDValue> SubVecAOps, SubVecBOps;
3920 SmallVector<SDValue> MaskVals;
3921 SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
3922 SubVecAOps.reserve(NumElts);
3923 SubVecBOps.reserve(NumElts);
3924 for (unsigned i = 0; i < NumElts; i++) {
3925 SDValue Elem = Op->getOperand(i);
3926 if (i < NumElts / 2) {
3927 SubVecAOps.push_back(Elem);
3928 SubVecBOps.push_back(UndefElem);
3929 } else {
3930 SubVecAOps.push_back(UndefElem);
3931 SubVecBOps.push_back(Elem);
3932 }
3933 bool SelectMaskVal = (i < NumElts / 2);
3934 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
3935 }
3936 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
3937 MaskVals.size() == NumElts);
3938
3939 SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
3940 SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
3941 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
3942 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
3943 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
3944 }
3945
3946 // Cap the cost at a value linear to the number of elements in the vector.
3947 // The default lowering is to use the stack. The vector store + scalar loads
3948 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
3949 // being (at least) linear in LMUL. As a result, using the vslidedown
3950 // lowering for every element ends up being VL*LMUL..
3951 // TODO: Should we be directly costing the stack alternative? Doing so might
3952 // give us a more accurate upper bound.
3953 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
3954
3955 // TODO: unify with TTI getSlideCost.
3956 InstructionCost PerSlideCost = 1;
3957 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
3958 default: break;
3960 PerSlideCost = 2;
3961 break;
3963 PerSlideCost = 4;
3964 break;
3966 PerSlideCost = 8;
3967 break;
3968 }
3969
3970 // TODO: Should we be using the build instseq then cost + evaluate scheme
3971 // we use for integer constants here?
3972 unsigned UndefCount = 0;
3973 for (const SDValue &V : Op->ops()) {
3974 if (V.isUndef()) {
3975 UndefCount++;
3976 continue;
3977 }
3978 if (UndefCount) {
3979 LinearBudget -= PerSlideCost;
3980 UndefCount = 0;
3981 }
3982 LinearBudget -= PerSlideCost;
3983 }
3984 if (UndefCount) {
3985 LinearBudget -= PerSlideCost;
3986 }
3987
3988 if (LinearBudget < 0)
3989 return SDValue();
3990
3991 assert((!VT.isFloatingPoint() ||
3992 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
3993 "Illegal type which will result in reserved encoding");
3994
3995 const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
3996
3997 SDValue Vec;
3998 UndefCount = 0;
3999 for (SDValue V : Op->ops()) {
4000 if (V.isUndef()) {
4001 UndefCount++;
4002 continue;
4003 }
4004
4005 // Start our sequence with a TA splat in the hopes that hardware is able to
4006 // recognize there's no dependency on the prior value of our temporary
4007 // register.
4008 if (!Vec) {
4009 Vec = DAG.getSplatVector(VT, DL, V);
4010 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4011 UndefCount = 0;
4012 continue;
4013 }
4014
4015 if (UndefCount) {
4016 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4017 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4018 Vec, Offset, Mask, VL, Policy);
4019 UndefCount = 0;
4020 }
4021 auto OpCode =
4023 if (!VT.isFloatingPoint())
4024 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
4025 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4026 V, Mask, VL);
4027 }
4028 if (UndefCount) {
4029 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4030 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4031 Vec, Offset, Mask, VL, Policy);
4032 }
4033 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4034}
4035
4036static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4038 SelectionDAG &DAG) {
4039 if (!Passthru)
4040 Passthru = DAG.getUNDEF(VT);
4041 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
4042 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4043 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4044 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4045 // node in order to try and match RVV vector/scalar instructions.
4046 if ((LoC >> 31) == HiC)
4047 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4048
4049 // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
4050 // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
4051 // vlmax vsetvli or vsetivli to change the VL.
4052 // FIXME: Support larger constants?
4053 // FIXME: Support non-constant VLs by saturating?
4054 if (LoC == HiC) {
4055 SDValue NewVL;
4056 if (isAllOnesConstant(VL) ||
4057 (isa<RegisterSDNode>(VL) &&
4058 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
4059 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4060 else if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
4061 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
4062
4063 if (NewVL) {
4064 MVT InterVT =
4065 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4066 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4067 DAG.getUNDEF(InterVT), Lo, NewVL);
4068 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
4069 }
4070 }
4071 }
4072
4073 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4074 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4075 isa<ConstantSDNode>(Hi.getOperand(1)) &&
4076 Hi.getConstantOperandVal(1) == 31)
4077 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4078
4079 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
4080 // even if it might be sign extended.
4081 if (Hi.isUndef())
4082 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4083
4084 // Fall back to a stack store and stride x0 vector load.
4085 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
4086 Hi, VL);
4087}
4088
4089// Called by type legalization to handle splat of i64 on RV32.
4090// FIXME: We can optimize this when the type has sign or zero bits in one
4091// of the halves.
4092static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4093 SDValue Scalar, SDValue VL,
4094 SelectionDAG &DAG) {
4095 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4096 SDValue Lo, Hi;
4097 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
4098 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4099}
4100
4101// This function lowers a splat of a scalar operand Splat with the vector
4102// length VL. It ensures the final sequence is type legal, which is useful when
4103// lowering a splat after type legalization.
4104static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4105 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4106 const RISCVSubtarget &Subtarget) {
4107 bool HasPassthru = Passthru && !Passthru.isUndef();
4108 if (!HasPassthru && !Passthru)
4109 Passthru = DAG.getUNDEF(VT);
4110 if (VT.isFloatingPoint())
4111 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
4112
4113 MVT XLenVT = Subtarget.getXLenVT();
4114
4115 // Simplest case is that the operand needs to be promoted to XLenVT.
4116 if (Scalar.getValueType().bitsLE(XLenVT)) {
4117 // If the operand is a constant, sign extend to increase our chances
4118 // of being able to use a .vi instruction. ANY_EXTEND would become a
4119 // a zero extend and the simm5 check in isel would fail.
4120 // FIXME: Should we ignore the upper bits in isel instead?
4121 unsigned ExtOpc =
4122 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4123 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4124 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
4125 }
4126
4127 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4128 "Unexpected scalar for splat lowering!");
4129
4130 if (isOneConstant(VL) && isNullConstant(Scalar))
4131 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
4132 DAG.getConstant(0, DL, XLenVT), VL);
4133
4134 // Otherwise use the more complicated splatting algorithm.
4135 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4136}
4137
4138// This function lowers an insert of a scalar operand Scalar into lane
4139// 0 of the vector regardless of the value of VL. The contents of the
4140// remaining lanes of the result vector are unspecified. VL is assumed
4141// to be non-zero.
4143 const SDLoc &DL, SelectionDAG &DAG,
4144 const RISCVSubtarget &Subtarget) {
4145 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4146
4147 const MVT XLenVT = Subtarget.getXLenVT();
4148 SDValue Passthru = DAG.getUNDEF(VT);
4149
4150 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4151 isNullConstant(Scalar.getOperand(1))) {
4152 SDValue ExtractedVal = Scalar.getOperand(0);
4153 // The element types must be the same.
4154 if (ExtractedVal.getValueType().getVectorElementType() ==
4155 VT.getVectorElementType()) {
4156 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4157 MVT ExtractedContainerVT = ExtractedVT;
4158 if (ExtractedContainerVT.isFixedLengthVector()) {
4159 ExtractedContainerVT = getContainerForFixedLengthVector(
4160 DAG, ExtractedContainerVT, Subtarget);
4161 ExtractedVal = convertToScalableVector(ExtractedContainerVT,
4162 ExtractedVal, DAG, Subtarget);
4163 }
4164 if (ExtractedContainerVT.bitsLE(VT))
4165 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru,
4166 ExtractedVal, DAG.getVectorIdxConstant(0, DL));
4167 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal,
4168 DAG.getVectorIdxConstant(0, DL));
4169 }
4170 }
4171
4172
4173 if (VT.isFloatingPoint())
4174 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT,
4175 DAG.getUNDEF(VT), Scalar, VL);
4176
4177 // Avoid the tricky legalization cases by falling back to using the
4178 // splat code which already handles it gracefully.
4179 if (!Scalar.getValueType().bitsLE(XLenVT))
4180 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
4181 DAG.getConstant(1, DL, XLenVT),
4182 VT, DL, DAG, Subtarget);
4183
4184 // If the operand is a constant, sign extend to increase our chances
4185 // of being able to use a .vi instruction. ANY_EXTEND would become a
4186 // a zero extend and the simm5 check in isel would fail.
4187 // FIXME: Should we ignore the upper bits in isel instead?
4188 unsigned ExtOpc =
4189 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4190 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4191 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT,
4192 DAG.getUNDEF(VT), Scalar, VL);
4193}
4194
4195// Is this a shuffle extracts either the even or odd elements of a vector?
4196// That is, specifically, either (a) or (b) below.
4197// t34: v8i8 = extract_subvector t11, Constant:i64<0>
4198// t33: v8i8 = extract_subvector t11, Constant:i64<8>
4199// a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33
4200// b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33
4201// Returns {Src Vector, Even Elements} om success
4202static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1,
4203 SDValue V2, ArrayRef<int> Mask,
4204 const RISCVSubtarget &Subtarget) {
4205 // Need to be able to widen the vector.
4206 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4207 return false;
4208
4209 // Both input must be extracts.
4210 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4211 V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
4212 return false;
4213
4214 // Extracting from the same source.
4215 SDValue Src = V1.getOperand(0);
4216 if (Src != V2.getOperand(0))
4217 return false;
4218
4219 // Src needs to have twice the number of elements.
4220 if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2))
4221 return false;
4222
4223 // The extracts must extract the two halves of the source.
4224 if (V1.getConstantOperandVal(1) != 0 ||
4225 V2.getConstantOperandVal(1) != Mask.size())
4226 return false;
4227
4228 // First index must be the first even or odd element from V1.
4229 if (Mask[0] != 0 && Mask[0] != 1)
4230 return false;
4231
4232 // The others must increase by 2 each time.
4233 // TODO: Support undef elements?
4234 for (unsigned i = 1; i != Mask.size(); ++i)
4235 if (Mask[i] != Mask[i - 1] + 2)
4236 return false;
4237
4238 return true;
4239}
4240
4241/// Is this shuffle interleaving contiguous elements from one vector into the
4242/// even elements and contiguous elements from another vector into the odd
4243/// elements. \p EvenSrc will contain the element that should be in the first
4244/// even element. \p OddSrc will contain the element that should be in the first
4245/// odd element. These can be the first element in a source or the element half
4246/// way through the source.
4247static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4248 int &OddSrc, const RISCVSubtarget &Subtarget) {
4249 // We need to be able to widen elements to the next larger integer type.
4250 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4251 return false;
4252
4253 int Size = Mask.size();
4254 int NumElts = VT.getVectorNumElements();
4255 assert(Size == (int)NumElts && "Unexpected mask size");
4256
4257 SmallVector<unsigned, 2> StartIndexes;
4258 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
4259 return false;
4260
4261 EvenSrc = StartIndexes[0];
4262 OddSrc = StartIndexes[1];
4263
4264 // One source should be low half of first vector.
4265 if (EvenSrc != 0 && OddSrc != 0)
4266 return false;
4267
4268 // Subvectors will be subtracted from either at the start of the two input
4269 // vectors, or at the start and middle of the first vector if it's an unary
4270 // interleave.
4271 // In both cases, HalfNumElts will be extracted.
4272 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4273 // we'll create an illegal extract_subvector.
4274 // FIXME: We could support other values using a slidedown first.
4275 int HalfNumElts = NumElts / 2;
4276 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4277}
4278
4279/// Match shuffles that concatenate two vectors, rotate the concatenation,
4280/// and then extract the original number of elements from the rotated result.
4281/// This is equivalent to vector.splice or X86's PALIGNR instruction. The
4282/// returned rotation amount is for a rotate right, where elements move from
4283/// higher elements to lower elements. \p LoSrc indicates the first source
4284/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
4285/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
4286/// 0 or 1 if a rotation is found.
4287///
4288/// NOTE: We talk about rotate to the right which matches how bit shift and
4289/// rotate instructions are described where LSBs are on the right, but LLVM IR
4290/// and the table below write vectors with the lowest elements on the left.
4291static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
4292 int Size = Mask.size();
4293
4294 // We need to detect various ways of spelling a rotation:
4295 // [11, 12, 13, 14, 15, 0, 1, 2]
4296 // [-1, 12, 13, 14, -1, -1, 1, -1]
4297 // [-1, -1, -1, -1, -1, -1, 1, 2]
4298 // [ 3, 4, 5, 6, 7, 8, 9, 10]
4299 // [-1, 4, 5, 6, -1, -1, 9, -1]
4300 // [-1, 4, 5, 6, -1, -1, -1, -1]
4301 int Rotation = 0;
4302 LoSrc = -1;
4303 HiSrc = -1;
4304 for (int i = 0; i != Size; ++i) {
4305 int M = Mask[i];
4306 if (M < 0)
4307 continue;
4308
4309 // Determine where a rotate vector would have started.
4310 int StartIdx = i - (M % Size);
4311 // The identity rotation isn't interesting, stop.
4312 if (StartIdx == 0)
4313 return -1;
4314
4315 // If we found the tail of a vector the rotation must be the missing
4316 // front. If we found the head of a vector, it must be how much of the
4317 // head.
4318 int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
4319
4320 if (Rotation == 0)
4321 Rotation = CandidateRotation;
4322 else if (Rotation != CandidateRotation)
4323 // The rotations don't match, so we can't match this mask.
4324 return -1;
4325
4326 // Compute which value this mask is pointing at.
4327 int MaskSrc = M < Size ? 0 : 1;
4328
4329 // Compute which of the two target values this index should be assigned to.
4330 // This reflects whether the high elements are remaining or the low elemnts
4331 // are remaining.
4332 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
4333
4334 // Either set up this value if we've not encountered it before, or check
4335 // that it remains consistent.
4336 if (TargetSrc < 0)
4337 TargetSrc = MaskSrc;
4338 else if (TargetSrc != MaskSrc)
4339 // This may be a rotation, but it pulls from the inputs in some
4340 // unsupported interleaving.
4341 return -1;
4342 }
4343
4344 // Check that we successfully analyzed the mask, and normalize the results.
4345 assert(Rotation != 0 && "Failed to locate a viable rotation!");
4346 assert((LoSrc >= 0 || HiSrc >= 0) &&
4347 "Failed to find a rotated input vector!");
4348
4349 return Rotation;
4350}
4351
4352// Lower a deinterleave shuffle to vnsrl.
4353// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true)
4354// -> [p, q, r, s] (EvenElts == false)
4355// VT is the type of the vector to return, <[vscale x ]n x ty>
4356// Src is the vector to deinterleave of type <[vscale x ]n*2 x ty>
4358 bool EvenElts,
4359 const RISCVSubtarget &Subtarget,
4360 SelectionDAG &DAG) {
4361 // The result is a vector of type <m x n x ty>
4362 MVT ContainerVT = VT;
4363 // Convert fixed vectors to scalable if needed
4364 if (ContainerVT.isFixedLengthVector()) {
4365 assert(Src.getSimpleValueType().isFixedLengthVector());
4366 ContainerVT = getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
4367
4368 // The source is a vector of type <m x n*2 x ty>
4369 MVT SrcContainerVT =
4371 ContainerVT.getVectorElementCount() * 2);
4372 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
4373 }
4374
4375 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4376
4377 // Bitcast the source vector from <m x n*2 x ty> -> <m x n x ty*2>
4378 // This also converts FP to int.
4379 unsigned EltBits = ContainerVT.getScalarSizeInBits();
4380 MVT WideSrcContainerVT = MVT::getVectorVT(
4381 MVT::getIntegerVT(EltBits * 2), ContainerVT.getVectorElementCount());
4382 Src = DAG.getBitcast(WideSrcContainerVT, Src);
4383
4384 // The integer version of the container type.
4385 MVT IntContainerVT = ContainerVT.changeVectorElementTypeToInteger();
4386
4387 // If we want even elements, then the shift amount is 0. Otherwise, shift by
4388 // the original element size.
4389 unsigned Shift = EvenElts ? 0 : EltBits;
4390 SDValue SplatShift = DAG.getNode(
4391 RISCVISD::VMV_V_X_VL, DL, IntContainerVT, DAG.getUNDEF(ContainerVT),
4392 DAG.getConstant(Shift, DL, Subtarget.getXLenVT()), VL);
4393 SDValue Res =
4394 DAG.getNode(RISCVISD::VNSRL_VL, DL, IntContainerVT, Src, SplatShift,
4395 DAG.getUNDEF(IntContainerVT), TrueMask, VL);
4396 // Cast back to FP if needed.
4397 Res = DAG.getBitcast(ContainerVT, Res);
4398
4399 if (VT.isFixedLengthVector())
4400 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
4401 return Res;
4402}
4403
4404// Lower the following shuffle to vslidedown.
4405// a)
4406// t49: v8i8 = extract_subvector t13, Constant:i64<0>
4407// t109: v8i8 = extract_subvector t13, Constant:i64<8>
4408// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
4409// b)
4410// t69: v16i16 = extract_subvector t68, Constant:i64<0>
4411// t23: v8i16 = extract_subvector t69, Constant:i64<0>
4412// t29: v4i16 = extract_subvector t23, Constant:i64<4>
4413// t26: v8i16 = extract_subvector t69, Constant:i64<8>
4414// t30: v4i16 = extract_subvector t26, Constant:i64<0>
4415// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
4417 SDValue V1, SDValue V2,
4418 ArrayRef<int> Mask,
4419 const RISCVSubtarget &Subtarget,
4420 SelectionDAG &DAG) {
4421 auto findNonEXTRACT_SUBVECTORParent =
4422 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
4423 uint64_t Offset = 0;
4424 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
4425 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
4426 // a scalable vector. But we don't want to match the case.
4427 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
4428 Offset += Parent.getConstantOperandVal(1);
4429 Parent = Parent.getOperand(0);
4430 }
4431 return std::make_pair(Parent, Offset);
4432 };
4433
4434 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
4435 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
4436
4437 // Extracting from the same source.
4438 SDValue Src = V1Src;
4439 if (Src != V2Src)
4440 return SDValue();
4441
4442 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
4443 SmallVector<int, 16> NewMask(Mask);
4444 for (size_t i = 0; i != NewMask.size(); ++i) {
4445 if (NewMask[i] == -1)
4446 continue;
4447
4448 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
4449 NewMask[i] = NewMask[i] + V1IndexOffset;
4450 } else {
4451 // Minus NewMask.size() is needed. Otherwise, the b case would be
4452 // <5,6,7,12> instead of <5,6,7,8>.
4453 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
4454 }
4455 }
4456
4457 // First index must be known and non-zero. It will be used as the slidedown
4458 // amount.
4459 if (NewMask[0] <= 0)
4460 return SDValue();
4461
4462 // NewMask is also continuous.
4463 for (unsigned i = 1; i != NewMask.size(); ++i)
4464 if (NewMask[i - 1] + 1 != NewMask[i])
4465 return SDValue();
4466
4467 MVT XLenVT = Subtarget.getXLenVT();
4468 MVT SrcVT = Src.getSimpleValueType();
4469 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
4470 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
4471 SDValue Slidedown =
4472 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4473 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
4474 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
4475 return DAG.getNode(
4477 convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget),
4478 DAG.getConstant(0, DL, XLenVT));
4479}
4480
4481// Because vslideup leaves the destination elements at the start intact, we can
4482// use it to perform shuffles that insert subvectors:
4483//
4484// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
4485// ->
4486// vsetvli zero, 8, e8, mf2, ta, ma
4487// vslideup.vi v8, v9, 4
4488//
4489// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
4490// ->
4491// vsetvli zero, 5, e8, mf2, tu, ma
4492// vslideup.v1 v8, v9, 2
4494 SDValue V1, SDValue V2,
4495 ArrayRef<int> Mask,
4496 const RISCVSubtarget &Subtarget,
4497 SelectionDAG &DAG) {
4498 unsigned NumElts = VT.getVectorNumElements();
4499 int NumSubElts, Index;
4500 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
4501 Index))
4502 return SDValue();
4503
4504 bool OpsSwapped = Mask[Index] < (int)NumElts;
4505 SDValue InPlace = OpsSwapped ? V2 : V1;
4506 SDValue ToInsert = OpsSwapped ? V1 : V2;
4507
4508 MVT XLenVT = Subtarget.getXLenVT();
4509 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4510 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
4511 // We slide up by the index that the subvector is being inserted at, and set
4512 // VL to the index + the number of elements being inserted.
4514 // If the we're adding a suffix to the in place vector, i.e. inserting right
4515 // up to the very end of it, then we don't actually care about the tail.
4516 if (NumSubElts + Index >= (int)NumElts)
4517 Policy |= RISCVII::TAIL_AGNOSTIC;
4518
4519 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
4520 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
4521 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
4522
4523 SDValue Res;
4524 // If we're inserting into the lowest elements, use a tail undisturbed
4525 // vmv.v.v.
4526 if (Index == 0)
4527 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
4528 VL);
4529 else
4530 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
4531 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
4532 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4533}
4534
4535/// Match v(f)slide1up/down idioms. These operations involve sliding
4536/// N-1 elements to make room for an inserted scalar at one end.
4538 SDValue V1, SDValue V2,
4539 ArrayRef<int> Mask,
4540 const RISCVSubtarget &Subtarget,
4541 SelectionDAG &DAG) {
4542 bool OpsSwapped = false;
4543 if (!isa<BuildVectorSDNode>(V1)) {
4544 if (!isa<BuildVectorSDNode>(V2))
4545 return SDValue();
4546 std::swap(V1, V2);
4547 OpsSwapped = true;
4548 }
4549 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
4550 if (!Splat)
4551 return SDValue();
4552
4553 // Return true if the mask could describe a slide of Mask.size() - 1
4554 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
4555 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
4556 const unsigned S = (Offset > 0) ? 0 : -Offset;
4557 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
4558 for (unsigned i = S; i != E; ++i)
4559 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
4560 return false;
4561 return true;
4562 };
4563
4564 const unsigned NumElts = VT.getVectorNumElements();
4565 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
4566 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
4567 return SDValue();
4568
4569 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
4570 // Inserted lane must come from splat, undef scalar is legal but not profitable.
4571 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
4572 return SDValue();
4573
4574 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4575 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4576 auto OpCode = IsVSlidedown ?
4579 if (!VT.isFloatingPoint())
4580 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
4581 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
4582 DAG.getUNDEF(ContainerVT),
4583 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
4584 Splat, TrueMask, VL);
4585 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4586}
4587
4588// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
4589// to create an interleaved vector of <[vscale x] n*2 x ty>.
4590// This requires that the size of ty is less than the subtarget's maximum ELEN.
4592 const SDLoc &DL, SelectionDAG &DAG,
4593 const RISCVSubtarget &Subtarget) {
4594 MVT VecVT = EvenV.getSimpleValueType();
4595 MVT VecContainerVT = VecVT; // <vscale x n x ty>
4596 // Convert fixed vectors to scalable if needed
4597 if (VecContainerVT.isFixedLengthVector()) {
4598 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
4599 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
4600 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
4601 }
4602
4603 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
4604
4605 // We're working with a vector of the same size as the resulting
4606 // interleaved vector, but with half the number of elements and
4607 // twice the SEW (Hence the restriction on not using the maximum
4608 // ELEN)
4609 MVT WideVT =
4611 VecVT.getVectorElementCount());
4612 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
4613 if (WideContainerVT.isFixedLengthVector())
4614 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
4615
4616 // Bitcast the input vectors to integers in case they are FP
4617 VecContainerVT = VecContainerVT.changeTypeToInteger();
4618 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
4619 OddV = DAG.getBitcast(VecContainerVT, OddV);
4620
4621 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
4622 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
4623
4624 SDValue Interleaved;
4625 if (Subtarget.hasStdExtZvbb()) {
4626 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
4627 SDValue OffsetVec =
4628 DAG.getSplatVector(VecContainerVT, DL,
4629 DAG.getConstant(VecVT.getScalarSizeInBits(), DL,
4630 Subtarget.getXLenVT()));
4631 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
4632 OffsetVec, Passthru, Mask, VL);
4633 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
4634 Interleaved, EvenV, Passthru, Mask, VL);
4635 } else {
4636 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
4637 // vwaddu.vv
4638 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
4639 OddV, Passthru, Mask, VL);
4640
4641 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
4642 SDValue AllOnesVec = DAG.getSplatVector(
4643 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
4644 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
4645 OddV, AllOnesVec, Passthru, Mask, VL);
4646
4647 // Add the two together so we get
4648 // (OddV * 0xff...ff) + (OddV + EvenV)
4649 // = (OddV * 0x100...00) + EvenV
4650 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
4651 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
4652 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
4653 Interleaved, OddsMul, Passthru, Mask, VL);
4654 }
4655
4656 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
4657 MVT ResultContainerVT = MVT::getVectorVT(
4658 VecVT.getVectorElementType(), // Make sure to use original type
4659 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
4660 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
4661
4662 // Convert back to a fixed vector if needed
4663 MVT ResultVT =
4666 if (ResultVT.isFixedLengthVector())
4667 Interleaved =
4668 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
4669
4670 return Interleaved;
4671}
4672
4673// If we have a vector of bits that we want to reverse, we can use a vbrev on a
4674// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
4676 SelectionDAG &DAG,
4677 const RISCVSubtarget &Subtarget) {
4678 SDLoc DL(SVN);
4679 MVT VT = SVN->getSimpleValueType(0);
4680 SDValue V = SVN->getOperand(0);
4681 unsigned NumElts = VT.getVectorNumElements();
4682
4683 assert(VT.getVectorElementType() == MVT::i1);
4684
4686 SVN->getMask().size()) ||
4687 !SVN->getOperand(1).isUndef())
4688 return SDValue();
4689
4690 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
4691 EVT ViaVT = EVT::getVectorVT(
4692 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
4693 EVT ViaBitVT =
4694 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
4695
4696 // If we don't have zvbb or the larger element type > ELEN, the operation will
4697 // be illegal.
4699 ViaVT) ||
4700 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
4701 return SDValue();
4702
4703 // If the bit vector doesn't fit exactly into the larger element type, we need
4704 // to insert it into the larger vector and then shift up the reversed bits
4705 // afterwards to get rid of the gap introduced.
4706 if (ViaEltSize > NumElts)
4707 V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ViaBitVT, DAG.getUNDEF(ViaBitVT),
4708 V, DAG.getVectorIdxConstant(0, DL));
4709
4710 SDValue Res =
4711 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
4712
4713 // Shift up the reversed bits if the vector didn't exactly fit into the larger
4714 // element type.
4715 if (ViaEltSize > NumElts)
4716 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
4717 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
4718
4719 Res = DAG.getBitcast(ViaBitVT, Res);
4720
4721 if (ViaEltSize > NumElts)
4722 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
4723 DAG.getVectorIdxConstant(0, DL));
4724 return Res;
4725}
4726
4728 SelectionDAG &DAG,
4729 const RISCVSubtarget &Subtarget,
4730 MVT &RotateVT, unsigned &RotateAmt) {
4731 SDLoc DL(SVN);
4732
4733 EVT VT = SVN->getValueType(0);
4734 unsigned NumElts = VT.getVectorNumElements();
4735 unsigned EltSizeInBits = VT.getScalarSizeInBits();
4736 unsigned NumSubElts;
4737 if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2,
4738 NumElts, NumSubElts, RotateAmt))
4739 return false;
4740 RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
4741 NumElts / NumSubElts);
4742
4743 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
4744 return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);
4745}
4746
4747// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
4748// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
4749// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
4751 SelectionDAG &DAG,
4752 const RISCVSubtarget &Subtarget) {
4753 SDLoc DL(SVN);
4754
4755 EVT VT = SVN->getValueType(0);
4756 unsigned RotateAmt;
4757 MVT RotateVT;
4758 if (!isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
4759 return SDValue();
4760
4761 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
4762
4763 SDValue Rotate;
4764 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
4765 // so canonicalize to vrev8.
4766 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
4767 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
4768 else
4769 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
4770 DAG.getConstant(RotateAmt, DL, RotateVT));
4771
4772 return DAG.getBitcast(VT, Rotate);
4773}
4774
4775// If compiling with an exactly known VLEN, see if we can split a
4776// shuffle on m2 or larger into a small number of m1 sized shuffles
4777// which write each destination registers exactly once.
4779 SelectionDAG &DAG,
4780 const RISCVSubtarget &Subtarget) {
4781 SDLoc DL(SVN);
4782 MVT VT = SVN->getSimpleValueType(0);
4783 SDValue V1 = SVN->getOperand(0);
4784 SDValue V2 = SVN->getOperand(1);
4785 ArrayRef<int> Mask = SVN->getMask();
4786 unsigned NumElts = VT.getVectorNumElements();
4787
4788 // If we don't know exact data layout, not much we can do. If this
4789 // is already m1 or smaller, no point in splitting further.
4790 const auto VLen = Subtarget.getRealVLen();
4791 if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)
4792 return SDValue();
4793
4794 // Avoid picking up bitrotate patterns which we have a linear-in-lmul
4795 // expansion for.
4796 unsigned RotateAmt;
4797 MVT RotateVT;
4798 if (isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
4799 return SDValue();
4800
4801 MVT ElemVT = VT.getVectorElementType();
4802 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4803 unsigned VRegsPerSrc = NumElts / ElemsPerVReg;
4804
4806 OutMasks(VRegsPerSrc, {-1, {}});
4807
4808 // Check if our mask can be done as a 1-to-1 mapping from source
4809 // to destination registers in the group without needing to
4810 // write each destination more than once.
4811 for (unsigned DstIdx = 0; DstIdx < Mask.size(); DstIdx++) {
4812 int DstVecIdx = DstIdx / ElemsPerVReg;
4813 int DstSubIdx = DstIdx % ElemsPerVReg;
4814 int SrcIdx = Mask[DstIdx];
4815 if (SrcIdx < 0 || (unsigned)SrcIdx >= 2 * NumElts)
4816 continue;
4817 int SrcVecIdx = SrcIdx / ElemsPerVReg;
4818 int SrcSubIdx = SrcIdx % ElemsPerVReg;
4819 if (OutMasks[DstVecIdx].first == -1)
4820 OutMasks[DstVecIdx].first = SrcVecIdx;
4821 if (OutMasks[DstVecIdx].first != SrcVecIdx)
4822 // Note: This case could easily be handled by keeping track of a chain
4823 // of source values and generating two element shuffles below. This is
4824 // less an implementation question, and more a profitability one.
4825 return SDValue();
4826
4827 OutMasks[DstVecIdx].second.resize(ElemsPerVReg, -1);
4828 OutMasks[DstVecIdx].second[DstSubIdx] = SrcSubIdx;
4829 }
4830
4831 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4832 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4833 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4834 assert(M1VT == getLMUL1VT(M1VT));
4835 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4836 SDValue Vec = DAG.getUNDEF(ContainerVT);
4837 // The following semantically builds up a fixed length concat_vector
4838 // of the component shuffle_vectors. We eagerly lower to scalable here
4839 // to avoid DAG combining it back to a large shuffle_vector again.
4840 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
4841 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
4842 for (unsigned DstVecIdx = 0 ; DstVecIdx < OutMasks.size(); DstVecIdx++) {
4843 auto &[SrcVecIdx, SrcSubMask] = OutMasks[DstVecIdx];
4844 if (SrcVecIdx == -1)
4845 continue;
4846 unsigned ExtractIdx = (SrcVecIdx % VRegsPerSrc) * NumOpElts;
4847 SDValue SrcVec = (unsigned)SrcVecIdx >= VRegsPerSrc ? V2 : V1;
4848 SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec,
4849 DAG.getVectorIdxConstant(ExtractIdx, DL));
4850 SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
4851 SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec, SubVec, SrcSubMask);
4852 SubVec = convertToScalableVector(M1VT, SubVec, DAG, Subtarget);
4853 unsigned InsertIdx = DstVecIdx * NumOpElts;
4854 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubVec,
4855 DAG.getVectorIdxConstant(InsertIdx, DL));
4856 }
4857 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4858}
4859
4861 const RISCVSubtarget &Subtarget) {
4862 SDValue V1 = Op.getOperand(0);
4863 SDValue V2 = Op.getOperand(1);
4864 SDLoc DL(Op);
4865 MVT XLenVT = Subtarget.getXLenVT();
4866 MVT VT = Op.getSimpleValueType();
4867 unsigned NumElts = VT.getVectorNumElements();
4868 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
4869
4870 if (VT.getVectorElementType() == MVT::i1) {
4871 // Lower to a vror.vi of a larger element type if possible before we promote
4872 // i1s to i8s.
4873 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
4874 return V;
4875 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
4876 return V;
4877
4878 // Promote i1 shuffle to i8 shuffle.
4879 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
4880 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
4881 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
4882 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
4883 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
4884 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
4885 ISD::SETNE);
4886 }
4887
4888 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4889
4890 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4891
4892 if (SVN->isSplat()) {
4893 const int Lane = SVN->getSplatIndex();
4894 if (Lane >= 0) {
4895 MVT SVT = VT.getVectorElementType();
4896
4897 // Turn splatted vector load into a strided load with an X0 stride.
4898 SDValue V = V1;
4899 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
4900 // with undef.
4901 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
4902 int Offset = Lane;
4903 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
4904 int OpElements =
4905 V.getOperand(0).getSimpleValueType().getVectorNumElements();
4906 V = V.getOperand(Offset / OpElements);
4907 Offset %= OpElements;
4908 }
4909
4910 // We need to ensure the load isn't atomic or volatile.
4911 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
4912 auto *Ld = cast<LoadSDNode>(V);
4913 Offset *= SVT.getStoreSize();
4914 SDValue NewAddr = DAG.getMemBasePlusOffset(
4915 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
4916
4917 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
4918 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
4919 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
4920 SDValue IntID =
4921 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
4922 SDValue Ops[] = {Ld->getChain(),
4923 IntID,
4924 DAG.getUNDEF(ContainerVT),
4925 NewAddr,
4926 DAG.getRegister(RISCV::X0, XLenVT),
4927 VL};
4928 SDValue NewLoad = DAG.getMemIntrinsicNode(
4929 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
4931 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
4932 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
4933 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
4934 }
4935
4936 // Otherwise use a scalar load and splat. This will give the best
4937 // opportunity to fold a splat into the operation. ISel can turn it into
4938 // the x0 strided load if we aren't able to fold away the select.
4939 if (SVT.isFloatingPoint())
4940 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
4941 Ld->getPointerInfo().getWithOffset(Offset),
4942 Ld->getOriginalAlign(),
4943 Ld->getMemOperand()->getFlags());
4944 else
4945 V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
4946 Ld->getPointerInfo().getWithOffset(Offset), SVT,
4947 Ld->getOriginalAlign(),
4948 Ld->getMemOperand()->getFlags());
4950
4951 unsigned Opc =
4953 SDValue Splat =
4954 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), V, VL);
4955 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4956 }
4957
4958 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
4959 assert(Lane < (int)NumElts && "Unexpected lane!");
4960 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
4961 V1, DAG.getConstant(Lane, DL, XLenVT),
4962 DAG.getUNDEF(ContainerVT), TrueMask, VL);
4963 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
4964 }
4965 }
4966
4967 // For exact VLEN m2 or greater, try to split to m1 operations if we
4968 // can split cleanly.
4969 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
4970 return V;
4971
4972 ArrayRef<int> Mask = SVN->getMask();
4973
4974 if (SDValue V =
4975 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
4976 return V;
4977
4978 if (SDValue V =
4979 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
4980 return V;
4981
4982 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
4983 // available.
4984 if (Subtarget.hasStdExtZvkb())
4985 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
4986 return V;
4987
4988 // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
4989 // be undef which can be handled with a single SLIDEDOWN/UP.
4990 int LoSrc, HiSrc;
4991 int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
4992 if (Rotation > 0) {
4993 SDValue LoV, HiV;
4994 if (LoSrc >= 0) {
4995 LoV = LoSrc == 0 ? V1 : V2;
4996 LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);
4997 }
4998 if (HiSrc >= 0) {
4999 HiV = HiSrc == 0 ? V1 : V2;
5000 HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);
5001 }
5002
5003 // We found a rotation. We need to slide HiV down by Rotation. Then we need
5004 // to slide LoV up by (NumElts - Rotation).
5005 unsigned InvRotate = NumElts - Rotation;
5006
5007 SDValue Res = DAG.getUNDEF(ContainerVT);
5008 if (HiV) {
5009 // Even though we could use a smaller VL, don't to avoid a vsetivli
5010 // toggle.
5011 Res = getVSlidedown(DAG, Subtarget, DL, ContainerVT, Res, HiV,
5012 DAG.getConstant(Rotation, DL, XLenVT), TrueMask, VL);
5013 }
5014 if (LoV)
5015 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, Res, LoV,
5016 DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL,
5018
5019 return convertFromScalableVector(VT, Res, DAG, Subtarget);
5020 }
5021
5022 // If this is a deinterleave and we can widen the vector, then we can use
5023 // vnsrl to deinterleave.
5024 if (isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) {
5025 return getDeinterleaveViaVNSRL(DL, VT, V1.getOperand(0), Mask[0] == 0,
5026 Subtarget, DAG);
5027 }
5028
5029 if (SDValue V =
5030 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
5031 return V;
5032
5033 // Detect an interleave shuffle and lower to
5034 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
5035 int EvenSrc, OddSrc;
5036 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) {
5037 // Extract the halves of the vectors.
5038 MVT HalfVT = VT.getHalfNumVectorElementsVT();
5039
5040 int Size = Mask.size();
5041 SDValue EvenV, OddV;
5042 assert(EvenSrc >= 0 && "Undef source?");
5043 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
5044 EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, EvenV,
5045 DAG.getVectorIdxConstant(EvenSrc % Size, DL));
5046
5047 assert(OddSrc >= 0 && "Undef source?");
5048 OddV = (OddSrc / Size) == 0 ? V1 : V2;
5049 OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV,
5050 DAG.getVectorIdxConstant(OddSrc % Size, DL));
5051
5052 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
5053 }
5054
5055
5056 // Handle any remaining single source shuffles
5057 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
5058 if (V2.isUndef()) {
5059 // We might be able to express the shuffle as a bitrotate. But even if we
5060 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
5061 // shifts and a vor will have a higher throughput than a vrgather.
5062 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5063 return V;
5064
5065 if (VT.getScalarSizeInBits() == 8 &&
5066 any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
5067 // On such a vector we're unable to use i8 as the index type.
5068 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
5069 // may involve vector splitting if we're already at LMUL=8, or our
5070 // user-supplied maximum fixed-length LMUL.
5071 return SDValue();
5072 }
5073
5074 // Base case for the two operand recursion below - handle the worst case
5075 // single source shuffle.
5076 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
5077 MVT IndexVT = VT.changeTypeToInteger();
5078 // Since we can't introduce illegal index types at this stage, use i16 and
5079 // vrgatherei16 if the corresponding index type for plain vrgather is greater
5080 // than XLenVT.
5081 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
5082 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5083 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5084 }
5085
5086 // If the mask allows, we can do all the index computation in 16 bits. This
5087 // requires less work and less register pressure at high LMUL, and creates
5088 // smaller constants which may be cheaper to materialize.
5089 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
5090 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
5091 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5092 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5093 }
5094
5095 MVT IndexContainerVT =
5096 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
5097
5098 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5099 SmallVector<SDValue> GatherIndicesLHS;
5100 for (int MaskIndex : Mask) {
5101 bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
5102 GatherIndicesLHS.push_back(IsLHSIndex
5103 ? DAG.getConstant(MaskIndex, DL, XLenVT)
5104 : DAG.getUNDEF(XLenVT));
5105 }
5106 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
5107 LHSIndices = convertToScalableVector(IndexContainerVT, LHSIndices, DAG,
5108 Subtarget);
5109 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
5110 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5111 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5112 }
5113
5114 // By default we preserve the original operand order, and use a mask to
5115 // select LHS as true and RHS as false. However, since RVV vector selects may
5116 // feature splats but only on the LHS, we may choose to invert our mask and
5117 // instead select between RHS and LHS.
5118 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
5119
5120 // Detect shuffles which can be re-expressed as vector selects; these are
5121 // shuffles in which each element in the destination is taken from an element
5122 // at the corresponding index in either source vectors.
5123 bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) {
5124 int MaskIndex = MaskIdx.value();
5125 return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
5126 });
5127 if (IsSelect) {
5128 // Now construct the mask that will be used by the vselect operation.
5129 SmallVector<SDValue> MaskVals;
5130 for (int MaskIndex : Mask) {
5131 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ SwapOps;
5132 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5133 }
5134
5135 if (SwapOps)
5136 std::swap(V1, V2);
5137
5138 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5139 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5140 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5141 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
5142 }
5143
5144 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
5145 // merged with a second vrgather.
5146 SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
5147 SmallVector<SDValue> MaskVals;
5148
5149 // Now construct the mask that will be used by the blended vrgather operation.
5150 // Cconstruct the appropriate indices into each vector.
5151 for (int MaskIndex : Mask) {
5152 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
5153 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5154 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
5155 ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
5156 ? MaskIndex : -1);
5157 ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
5158 }
5159
5160 if (SwapOps) {
5161 std::swap(V1, V2);
5162 std::swap(ShuffleMaskLHS, ShuffleMaskRHS);
5163 }
5164
5165 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5166 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5167 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5168
5169 // Recursively invoke lowering for each operand if we had two
5170 // independent single source shuffles, and then combine the result via a
5171 // vselect. Note that the vselect will likely be folded back into the
5172 // second permute (vrgather, or other) by the post-isel combine.
5173 V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
5174 V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);
5175 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);
5176}
5177
5179 // Support splats for any type. These should type legalize well.
5180 if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
5181 return true;
5182
5183 // Only support legal VTs for other shuffles for now.
5184 if (!isTypeLegal(VT))
5185 return false;
5186
5187 MVT SVT = VT.getSimpleVT();
5188
5189 // Not for i1 vectors.
5190 if (SVT.getScalarType() == MVT::i1)
5191 return false;
5192
5193 int Dummy1, Dummy2;
5194 return (isElementRotate(Dummy1, Dummy2, M) > 0) ||
5195 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
5196}
5197
5198// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
5199// the exponent.
5200SDValue
5201RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
5202 SelectionDAG &DAG) const {
5203 MVT VT = Op.getSimpleValueType();
5204 unsigned EltSize = VT.getScalarSizeInBits();
5205 SDValue Src = Op.getOperand(0);
5206 SDLoc DL(Op);
5207 MVT ContainerVT = VT;
5208
5209 SDValue Mask, VL;
5210 if (Op->isVPOpcode()) {
5211 Mask = Op.getOperand(1);
5212 if (VT.isFixedLengthVector())
5213 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5214 Subtarget);
5215 VL = Op.getOperand(2);
5216 }
5217
5218 // We choose FP type that can represent the value if possible. Otherwise, we
5219 // use rounding to zero conversion for correct exponent of the result.
5220 // TODO: Use f16 for i8 when possible?
5221 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
5222 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
5223 FloatEltVT = MVT::f32;
5224 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
5225
5226 // Legal types should have been checked in the RISCVTargetLowering
5227 // constructor.
5228 // TODO: Splitting may make sense in some cases.
5229 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
5230 "Expected legal float type!");
5231
5232 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
5233 // The trailing zero count is equal to log2 of this single bit value.
5234 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
5235 SDValue Neg = DAG.getNegative(Src, DL, VT);
5236 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
5237 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
5238 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
5239 Src, Mask, VL);
5240 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
5241 }
5242
5243 // We have a legal FP type, convert to it.
5244 SDValue FloatVal;
5245 if (FloatVT.bitsGT(VT)) {
5246 if (Op->isVPOpcode())
5247 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
5248 else
5249 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
5250 } else {
5251 // Use RTZ to avoid rounding influencing exponent of FloatVal.
5252 if (VT.isFixedLengthVector()) {
5253 ContainerVT = getContainerForFixedLengthVector(VT);
5254 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
5255 }
5256 if (!Op->isVPOpcode())
5257 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5258 SDValue RTZRM =
5260 MVT ContainerFloatVT =
5261 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
5262 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
5263 Src, Mask, RTZRM, VL);
5264 if (VT.isFixedLengthVector())
5265 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
5266 }
5267 // Bitcast to integer and shift the exponent to the LSB.
5268 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
5269 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
5270 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
5271
5272 SDValue Exp;
5273 // Restore back to original type. Truncation after SRL is to generate vnsrl.
5274 if (Op->isVPOpcode()) {
5275 Exp = DAG.getNode(ISD::VP_LSHR, DL, IntVT, Bitcast,
5276 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
5277 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
5278 } else {
5279 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
5280 DAG.getConstant(ShiftAmt, DL, IntVT));
5281 if (IntVT.bitsLT(VT))
5282 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
5283 else if (IntVT.bitsGT(VT))
5284 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
5285 }
5286
5287 // The exponent contains log2 of the value in biased form.
5288 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
5289 // For trailing zeros, we just need to subtract the bias.
5290 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
5291 return DAG.getNode(ISD::SUB, DL, VT, Exp,
5292 DAG.getConstant(ExponentBias, DL, VT));
5293 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
5294 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
5295 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
5296
5297 // For leading zeros, we need to remove the bias and convert from log2 to
5298 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
5299 unsigned Adjust = ExponentBias + (EltSize - 1);
5300 SDValue Res;
5301 if (Op->isVPOpcode())
5302 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
5303 Mask, VL);
5304 else
5305 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
5306
5307 // The above result with zero input equals to Adjust which is greater than
5308 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
5309 if (Op.getOpcode() == ISD::CTLZ)
5310 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
5311 else if (Op.getOpcode() == ISD::VP_CTLZ)
5312 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
5313 DAG.getConstant(EltSize, DL, VT), Mask, VL);
5314 return Res;
5315}
5316
5317// While RVV has alignment restrictions, we should always be able to load as a
5318// legal equivalently-sized byte-typed vector instead. This method is
5319// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
5320// the load is already correctly-aligned, it returns SDValue().
5321SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
5322 SelectionDAG &DAG) const {
5323 auto *Load = cast<LoadSDNode>(Op);
5324 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
5325
5327 Load->getMemoryVT(),
5328 *Load->getMemOperand()))
5329 return SDValue();
5330
5331 SDLoc DL(Op);
5332 MVT VT = Op.getSimpleValueType();
5333 unsigned EltSizeBits = VT.getScalarSizeInBits();
5334 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5335 "Unexpected unaligned RVV load type");
5336 MVT NewVT =
5337 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5338 assert(NewVT.isValid() &&
5339 "Expecting equally-sized RVV vector types to be legal");
5340 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
5341 Load->getPointerInfo(), Load->getOriginalAlign(),
5342 Load->getMemOperand()->getFlags());
5343 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
5344}
5345
5346// While RVV has alignment restrictions, we should always be able to store as a
5347// legal equivalently-sized byte-typed vector instead. This method is
5348// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
5349// returns SDValue() if the store is already correctly aligned.
5350SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
5351 SelectionDAG &DAG) const {
5352 auto *Store = cast<StoreSDNode>(Op);
5353 assert(Store && Store->getValue().getValueType().isVector() &&
5354 "Expected vector store");
5355
5357 Store->getMemoryVT(),
5358 *Store->getMemOperand()))
5359 return SDValue();
5360
5361 SDLoc DL(Op);
5362 SDValue StoredVal = Store->getValue();
5363 MVT VT = StoredVal.getSimpleValueType();
5364 unsigned EltSizeBits = VT.getScalarSizeInBits();
5365 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5366 "Unexpected unaligned RVV store type");
5367 MVT NewVT =
5368 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5369 assert(NewVT.isValid() &&
5370 "Expecting equally-sized RVV vector types to be legal");
5371 StoredVal = DAG.getBitcast(NewVT, StoredVal);
5372 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
5373 Store->getPointerInfo(), Store->getOriginalAlign(),
5374 Store->getMemOperand()->getFlags());
5375}
5376
5378 const RISCVSubtarget &Subtarget) {
5379 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
5380
5381 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
5382
5383 // All simm32 constants should be handled by isel.
5384 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
5385 // this check redundant, but small immediates are common so this check
5386 // should have better compile time.
5387 if (isInt<32>(Imm))
5388 return Op;
5389
5390 // We only need to cost the immediate, if constant pool lowering is enabled.
5391 if (!Subtarget.useConstantPoolForLargeInts())
5392 return Op;
5393
5395 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
5396 return Op;
5397
5398 // Optimizations below are disabled for opt size. If we're optimizing for
5399 // size, use a constant pool.
5400 if (DAG.shouldOptForSize())
5401 return SDValue();
5402
5403 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
5404 // that if it will avoid a constant pool.
5405 // It will require an extra temporary register though.
5406 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
5407 // low and high 32 bits are the same and bit 31 and 63 are set.
5408 unsigned ShiftAmt, AddOpc;
5409 RISCVMatInt::InstSeq SeqLo =
5410 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
5411 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
5412 return Op;
5413
5414 return SDValue();
5415}
5416
5418 const RISCVSubtarget &Subtarget) {
5419 SDLoc dl(Op);
5420 AtomicOrdering FenceOrdering =
5421 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
5422 SyncScope::ID FenceSSID =
5423 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
5424
5425 if (Subtarget.hasStdExtZtso()) {
5426 // The only fence that needs an instruction is a sequentially-consistent
5427 // cross-thread fence.
5428 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
5429 FenceSSID == SyncScope::System)
5430 return Op;
5431
5432 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5433 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5434 }
5435
5436 // singlethread fences only synchronize with signal handlers on the same
5437 // thread and thus only need to preserve instruction order, not actually
5438 // enforce memory ordering.
5439 if (FenceSSID == SyncScope::SingleThread)
5440 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5441 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
5442
5443 return Op;
5444}
5445
5447 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5448 "Unexpected custom legalisation");
5449
5450 // With Zbb, we can widen to i64 and smin/smax with INT32_MAX/MIN.
5451 bool IsAdd = Op.getOpcode() == ISD::SADDSAT;
5452 SDLoc DL(Op);
5453 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5454 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5455 SDValue Result =
5456 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
5457
5458 APInt MinVal = APInt::getSignedMinValue(32).sext(64);
5459 APInt MaxVal = APInt::getSignedMaxValue(32).sext(64);
5460 SDValue SatMin = DAG.getConstant(MinVal, DL, MVT::i64);
5461 SDValue SatMax = DAG.getConstant(MaxVal, DL, MVT::i64);
5462 Result = DAG.getNode(ISD::SMIN, DL, MVT::i64, Result, SatMax);
5463 Result = DAG.getNode(ISD::SMAX, DL, MVT::i64, Result, SatMin);
5464 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
5465}
5466
5468 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5469 "Unexpected custom legalisation");
5470
5471 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
5472 // sign extend allows overflow of the lower 32 bits to be detected on
5473 // the promoted size.
5474 SDLoc DL(Op);
5475 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5476 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5477 SDValue WideOp = DAG.getNode(Op.getOpcode(), DL, MVT::i64, LHS, RHS);
5478 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, WideOp);
5479}
5480
5481// Custom lower i32 SADDO/SSUBO with RV64LegalI32 so we take advantage of addw.
5483 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5484 "Unexpected custom legalisation");
5485 if (isa<ConstantSDNode>(Op.getOperand(1)))
5486 return SDValue();
5487
5488 bool IsAdd = Op.getOpcode() == ISD::SADDO;
5489 SDLoc DL(Op);
5490 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5491 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5492 SDValue WideOp =
5493 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
5494 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, WideOp);
5495 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, WideOp,
5496 DAG.getValueType(MVT::i32));
5497 SDValue Ovf = DAG.getSetCC(DL, Op.getValue(1).getValueType(), WideOp, SExt,
5498 ISD::SETNE);
5499 return DAG.getMergeValues({Res, Ovf}, DL);
5500}
5501
5502// Custom lower i32 SMULO with RV64LegalI32 so we take advantage of mulw.
5504 assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
5505 "Unexpected custom legalisation");
5506 SDLoc DL(Op);
5507 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
5508 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
5509 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
5510 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
5511 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Mul,
5512 DAG.getValueType(MVT::i32));
5513 SDValue Ovf = DAG.getSetCC(DL, Op.getValue(1).getValueType(), Mul, SExt,
5514 ISD::SETNE);
5515 return DAG.getMergeValues({Res, Ovf}, DL);
5516}
5517
5518SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
5519 SelectionDAG &DAG) const {
5520 SDLoc DL(Op);
5521 MVT VT = Op.getSimpleValueType();
5522 MVT XLenVT = Subtarget.getXLenVT();
5523 unsigned Check = Op.getConstantOperandVal(1);
5524 unsigned TDCMask = 0;
5525 if (Check & fcSNan)
5526 TDCMask |= RISCV::FPMASK_Signaling_NaN;
5527 if (Check & fcQNan)
5528 TDCMask |= RISCV::FPMASK_Quiet_NaN;
5529 if (Check & fcPosInf)
5531 if (Check & fcNegInf)
5533 if (Check & fcPosNormal)
5535 if (Check & fcNegNormal)
5537 if (Check & fcPosSubnormal)
5539 if (Check & fcNegSubnormal)
5541 if (Check & fcPosZero)
5542 TDCMask |= RISCV::FPMASK_Positive_Zero;
5543 if (Check & fcNegZero)
5544 TDCMask |= RISCV::FPMASK_Negative_Zero;
5545
5546 bool IsOneBitMask = isPowerOf2_32(TDCMask);
5547
5548 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
5549
5550 if (VT.isVector()) {
5551 SDValue Op0 = Op.getOperand(0);
5552 MVT VT0 = Op.getOperand(0).getSimpleValueType();
5553
5554 if (VT.isScalableVector()) {
5556 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
5557 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
5558 Mask = Op.getOperand(2);
5559 VL = Op.getOperand(3);
5560 }
5561 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
5562 VL, Op->getFlags());
5563 if (IsOneBitMask)
5564 return DAG.getSetCC(DL, VT, FPCLASS,
5565 DAG.getConstant(TDCMask, DL, DstVT),
5567 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
5568 DAG.getConstant(TDCMask, DL, DstVT));
5569 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
5570 ISD::SETNE);
5571 }
5572
5573 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
5574 MVT ContainerVT = getContainerForFixedLengthVector(VT);
5575 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
5576 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
5577 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
5578 Mask = Op.getOperand(2);
5579 MVT MaskContainerVT =
5580 getContainerForFixedLengthVector(Mask.getSimpleValueType());
5581 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
5582 VL = Op.getOperand(3);
5583 }
5584 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
5585
5586 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
5587 Mask, VL, Op->getFlags());
5588
5589 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
5590 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
5591 if (IsOneBitMask) {
5592 SDValue VMSEQ =
5593 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
5594 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
5595 DAG.getUNDEF(ContainerVT), Mask, VL});
5596 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
5597 }
5598 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
5599 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
5600
5601 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
5602 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
5603 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
5604
5605 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
5606 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
5607 DAG.getUNDEF(ContainerVT), Mask, VL});
5608 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
5609 }
5610
5611 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
5612 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
5613 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
5615 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
5616}
5617
5618// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
5619// operations propagate nans.
5621 const RISCVSubtarget &Subtarget) {
5622 SDLoc DL(Op);
5623 MVT VT = Op.getSimpleValueType();
5624
5625 SDValue X = Op.getOperand(0);
5626 SDValue Y = Op.getOperand(1);
5627
5628 if (!VT.isVector()) {
5629 MVT XLenVT = Subtarget.getXLenVT();
5630
5631 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
5632 // ensures that when one input is a nan, the other will also be a nan
5633 // allowing the nan to propagate. If both inputs are nan, this will swap the
5634 // inputs which is harmless.
5635
5636 SDValue NewY = Y;
5637 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
5638 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
5639 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
5640 }
5641
5642 SDValue NewX = X;
5643 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
5644 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
5645 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
5646 }
5647
5648 unsigned Opc =
5649 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
5650 return DAG.getNode(Opc, DL, VT, NewX, NewY);
5651 }
5652
5653 // Check no NaNs before converting to fixed vector scalable.
5654 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
5655 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
5656
5657 MVT ContainerVT = VT;
5658 if (VT.isFixedLengthVector()) {
5659 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5660 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
5661 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
5662 }
5663
5664 SDValue Mask, VL;
5665 if (Op->isVPOpcode()) {
5666 Mask = Op.getOperand(2);
5667 if (VT.isFixedLengthVector())
5668 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5669 Subtarget);
5670 VL = Op.getOperand(3);
5671 } else {
5672 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5673 }
5674
5675 SDValue NewY = Y;
5676 if (!XIsNeverNan) {
5677 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
5678 {X, X, DAG.getCondCode(ISD::SETOEQ),
5679 DAG.getUNDEF(ContainerVT), Mask, VL});
5680 NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X,
5681 DAG.getUNDEF(ContainerVT), VL);
5682 }
5683
5684 SDValue NewX = X;
5685 if (!YIsNeverNan) {
5686 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
5687 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
5688 DAG.getUNDEF(ContainerVT), Mask, VL});
5689 NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y,
5690 DAG.getUNDEF(ContainerVT), VL);
5691 }
5692
5693 unsigned Opc =
5694 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
5697 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
5698 DAG.getUNDEF(ContainerVT), Mask, VL);
5699 if (VT.isFixedLengthVector())
5700 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
5701 return Res;
5702}
5703
5704/// Get a RISC-V target specified VL op for a given SDNode.
5705static unsigned getRISCVVLOp(SDValue Op) {
5706#define OP_CASE(NODE) \
5707 case ISD::NODE: \
5708 return RISCVISD::NODE##_VL;
5709#define VP_CASE(NODE) \
5710 case ISD::VP_##NODE: \
5711 return RISCVISD::NODE##_VL;
5712 // clang-format off
5713 switch (Op.getOpcode()) {
5714 default:
5715 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
5716 OP_CASE(ADD)
5717 OP_CASE(SUB)
5718 OP_CASE(MUL)
5719 OP_CASE(MULHS)
5720 OP_CASE(MULHU)
5721 OP_CASE(SDIV)
5722 OP_CASE(SREM)
5723 OP_CASE(UDIV)
5724 OP_CASE(UREM)
5725 OP_CASE(SHL)
5726 OP_CASE(SRA)
5727 OP_CASE(SRL)
5728 OP_CASE(ROTL)
5729 OP_CASE(ROTR)
5730 OP_CASE(BSWAP)
5731 OP_CASE(CTTZ)
5732 OP_CASE(CTLZ)
5733 OP_CASE(CTPOP)
5734 OP_CASE(BITREVERSE)
5735 OP_CASE(SADDSAT)
5736 OP_CASE(UADDSAT)
5737 OP_CASE(SSUBSAT)
5738 OP_CASE(USUBSAT)
5739 OP_CASE(AVGFLOORU)
5740 OP_CASE(AVGCEILU)
5741 OP_CASE(FADD)
5742 OP_CASE(FSUB)
5743 OP_CASE(FMUL)
5744 OP_CASE(FDIV)
5745 OP_CASE(FNEG)
5746 OP_CASE(FABS)
5747 OP_CASE(FSQRT)
5748 OP_CASE(SMIN)
5749 OP_CASE(SMAX)
5750 OP_CASE(UMIN)
5751 OP_CASE(UMAX)
5752 OP_CASE(STRICT_FADD)
5753 OP_CASE(STRICT_FSUB)
5754 OP_CASE(STRICT_FMUL)
5755 OP_CASE(STRICT_FDIV)
5756 OP_CASE(STRICT_FSQRT)
5757 VP_CASE(ADD) // VP_ADD
5758 VP_CASE(SUB) // VP_SUB
5759 VP_CASE(MUL) // VP_MUL
5760 VP_CASE(SDIV) // VP_SDIV
5761 VP_CASE(SREM) // VP_SREM
5762 VP_CASE(UDIV) // VP_UDIV
5763 VP_CASE(UREM) // VP_UREM
5764 VP_CASE(SHL) // VP_SHL
5765 VP_CASE(FADD) // VP_FADD
5766 VP_CASE(FSUB) // VP_FSUB
5767 VP_CASE(FMUL) // VP_FMUL
5768 VP_CASE(FDIV) // VP_FDIV
5769 VP_CASE(FNEG) // VP_FNEG
5770 VP_CASE(FABS) // VP_FABS
5771 VP_CASE(SMIN) // VP_SMIN
5772 VP_CASE(SMAX) // VP_SMAX
5773 VP_CASE(UMIN) // VP_UMIN
5774 VP_CASE(UMAX) // VP_UMAX
5775 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
5776 VP_CASE(SETCC) // VP_SETCC
5777 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
5778 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
5779 VP_CASE(BITREVERSE) // VP_BITREVERSE
5780 VP_CASE(SADDSAT) // VP_SADDSAT
5781 VP_CASE(UADDSAT) // VP_UADDSAT
5782 VP_CASE(SSUBSAT) // VP_SSUBSAT
5783 VP_CASE(USUBSAT) // VP_USUBSAT
5784 VP_CASE(BSWAP) // VP_BSWAP
5785 VP_CASE(CTLZ) // VP_CTLZ
5786 VP_CASE(CTTZ) // VP_CTTZ
5787 VP_CASE(CTPOP) // VP_CTPOP
5789 case ISD::VP_CTLZ_ZERO_UNDEF:
5790 return RISCVISD::CTLZ_VL;
5792 case ISD::VP_CTTZ_ZERO_UNDEF:
5793 return RISCVISD::CTTZ_VL;
5794 case ISD::FMA:
5795 case ISD::VP_FMA:
5796 return RISCVISD::VFMADD_VL;
5797 case ISD::STRICT_FMA:
5799 case ISD::AND:
5800 case ISD::VP_AND:
5801 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5802 return RISCVISD::VMAND_VL;
5803 return RISCVISD::AND_VL;
5804 case ISD::OR:
5805 case ISD::VP_OR:
5806 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5807 return RISCVISD::VMOR_VL;
5808 return RISCVISD::OR_VL;
5809 case ISD::XOR:
5810 case ISD::VP_XOR:
5811 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
5812 return RISCVISD::VMXOR_VL;
5813 return RISCVISD::XOR_VL;
5814 case ISD::VP_SELECT:
5815 case ISD::VP_MERGE:
5816 return RISCVISD::VMERGE_VL;
5817 case ISD::VP_ASHR:
5818 return RISCVISD::SRA_VL;
5819 case ISD::VP_LSHR:
5820 return RISCVISD::SRL_VL;
5821 case ISD::VP_SQRT:
5822 return RISCVISD::FSQRT_VL;
5823 case ISD::VP_SIGN_EXTEND:
5824 return RISCVISD::VSEXT_VL;
5825 case ISD::VP_ZERO_EXTEND:
5826 return RISCVISD::VZEXT_VL;
5827 case ISD::VP_FP_TO_SINT:
5829 case ISD::VP_FP_TO_UINT:
5831 case ISD::FMINNUM:
5832 case ISD::VP_FMINNUM:
5833 return RISCVISD::VFMIN_VL;
5834 case ISD::FMAXNUM:
5835 case ISD::VP_FMAXNUM:
5836 return RISCVISD::VFMAX_VL;
5837 case ISD::LRINT:
5838 case ISD::VP_LRINT:
5839 case ISD::LLRINT:
5840 case ISD::VP_LLRINT:
5842 }
5843 // clang-format on
5844#undef OP_CASE
5845#undef VP_CASE
5846}
5847
5848/// Return true if a RISC-V target specified op has a merge operand.
5849static bool hasMergeOp(unsigned Opcode) {
5850 assert(Opcode > RISCVISD::FIRST_NUMBER &&
5852 "not a RISC-V target specific op");
5854 126 &&
5857 21 &&
5858 "adding target specific op should update this function");
5859 if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL)
5860 return true;
5861 if (Opcode == RISCVISD::FCOPYSIGN_VL)
5862 return true;
5863 if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL)
5864 return true;
5865 if (Opcode == RISCVISD::SETCC_VL)
5866 return true;
5867 if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL)
5868 return true;
5869 if (Opcode == RISCVISD::VMERGE_VL)
5870 return true;
5871 return false;
5872}
5873
5874/// Return true if a RISC-V target specified op has a mask operand.
5875static bool hasMaskOp(unsigned Opcode) {
5876 assert(Opcode > RISCVISD::FIRST_NUMBER &&
5878 "not a RISC-V target specific op");
5880 126 &&
5883 21 &&
5884 "adding target specific op should update this function");
5885 if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
5886 return true;
5887 if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL)
5888 return true;
5889 if (Opcode >= RISCVISD::STRICT_FADD_VL &&
5891 return true;
5892 return false;
5893}
5894
5896 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
5897 SDLoc DL(Op);
5898
5901
5902 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
5903 if (!Op.getOperand(j).getValueType().isVector()) {
5904 LoOperands[j] = Op.getOperand(j);
5905 HiOperands[j] = Op.getOperand(j);
5906 continue;
5907 }
5908 std::tie(LoOperands[j], HiOperands[j]) =
5909 DAG.SplitVector(Op.getOperand(j), DL);
5910 }
5911
5912 SDValue LoRes =
5913 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
5914 SDValue HiRes =
5915 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
5916
5917 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
5918}
5919
5921 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
5922 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
5923 SDLoc DL(Op);
5924
5927
5928 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
5929 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
5930 std::tie(LoOperands[j], HiOperands[j]) =
5931 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
5932 continue;
5933 }
5934 if (!Op.getOperand(j).getValueType().isVector()) {
5935 LoOperands[j] = Op.getOperand(j);
5936 HiOperands[j] = Op.getOperand(j);
5937 continue;
5938 }
5939 std::tie(LoOperands[j], HiOperands[j]) =
5940 DAG.SplitVector(Op.getOperand(j), DL);
5941 }
5942
5943 SDValue LoRes =
5944 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
5945 SDValue HiRes =
5946 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
5947
5948 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
5949}
5950
5952 SDLoc DL(Op);
5953
5954 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
5955 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
5956 auto [EVLLo, EVLHi] =
5957 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
5958
5959 SDValue ResLo =
5960 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
5961 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
5962 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
5963 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
5964}
5965
5967
5968 assert(Op->isStrictFPOpcode());
5969
5970 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
5971
5972 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
5973 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
5974
5975 SDLoc DL(Op);
5976
5979
5980 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
5981 if (!Op.getOperand(j).getValueType().isVector()) {
5982 LoOperands[j] = Op.getOperand(j);
5983 HiOperands[j] = Op.getOperand(j);
5984 continue;
5985 }
5986 std::tie(LoOperands[j], HiOperands[j]) =
5987 DAG.SplitVector(Op.getOperand(j), DL);
5988 }
5989
5990 SDValue LoRes =
5991 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
5992 HiOperands[0] = LoRes.getValue(1);
5993 SDValue HiRes =
5994 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
5995
5996 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
5997 LoRes.getValue(0), HiRes.getValue(0));
5998 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
5999}
6000
6002 SelectionDAG &DAG) const {
6003 switch (Op.getOpcode()) {
6004 default:
6005 report_fatal_error("unimplemented operand");
6006 case ISD::ATOMIC_FENCE:
6007 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
6008 case ISD::GlobalAddress:
6009 return lowerGlobalAddress(Op, DAG);
6010 case ISD::BlockAddress:
6011 return lowerBlockAddress(Op, DAG);
6012 case ISD::ConstantPool:
6013 return lowerConstantPool(Op, DAG);
6014 case ISD::JumpTable:
6015 return lowerJumpTable(Op, DAG);
6017 return lowerGlobalTLSAddress(Op, DAG);
6018 case ISD::Constant:
6019 return lowerConstant(Op, DAG, Subtarget);
6020 case ISD::SELECT:
6021 return lowerSELECT(Op, DAG);
6022 case ISD::BRCOND:
6023 return lowerBRCOND(Op, DAG);
6024 case ISD::VASTART:
6025 return lowerVASTART(Op, DAG);
6026 case ISD::FRAMEADDR:
6027 return lowerFRAMEADDR(Op, DAG);
6028 case ISD::RETURNADDR:
6029 return lowerRETURNADDR(Op, DAG);
6030 case ISD::SADDO:
6031 case ISD::SSUBO:
6032 return lowerSADDO_SSUBO(Op, DAG);
6033 case ISD::SMULO:
6034 return lowerSMULO(Op, DAG);
6035 case ISD::SHL_PARTS:
6036 return lowerShiftLeftParts(Op, DAG);
6037 case ISD::SRA_PARTS:
6038 return lowerShiftRightParts(Op, DAG, true);
6039 case ISD::SRL_PARTS:
6040 return lowerShiftRightParts(Op, DAG, false);
6041 case ISD::ROTL:
6042 case ISD::ROTR:
6043 if (Op.getValueType().isFixedLengthVector()) {
6044 assert(Subtarget.hasStdExtZvkb());
6045 return lowerToScalableOp(Op, DAG);
6046 }
6047 assert(Subtarget.hasVendorXTHeadBb() &&
6048 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
6049 "Unexpected custom legalization");
6050 // XTHeadBb only supports rotate by constant.
6051 if (!isa<ConstantSDNode>(Op.getOperand(1)))
6052 return SDValue();
6053 return Op;
6054 case ISD::BITCAST: {
6055 SDLoc DL(Op);
6056 EVT VT = Op.getValueType();
6057 SDValue Op0 = Op.getOperand(0);
6058 EVT Op0VT = Op0.getValueType();
6059 MVT XLenVT = Subtarget.getXLenVT();
6060 if (VT == MVT::f16 && Op0VT == MVT::i16 &&
6061 Subtarget.hasStdExtZfhminOrZhinxmin()) {
6062 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
6063 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
6064 return FPConv;
6065 }
6066 if (VT == MVT::bf16 && Op0VT == MVT::i16 &&
6067 Subtarget.hasStdExtZfbfmin()) {
6068 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
6069 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::bf16, NewOp0);
6070 return FPConv;
6071 }
6072 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
6073 Subtarget.hasStdExtFOrZfinx()) {
6074 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6075 SDValue FPConv =
6076 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
6077 return FPConv;
6078 }
6079 if (VT == MVT::f64 && Op0VT == MVT::i64 && XLenVT == MVT::i32) {
6080 SDValue Lo, Hi;
6081 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
6082 SDValue RetReg =
6083 DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
6084 return RetReg;
6085 }
6086
6087 // Consider other scalar<->scalar casts as legal if the types are legal.
6088 // Otherwise expand them.
6089 if (!VT.isVector() && !Op0VT.isVector()) {
6090 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
6091 return Op;
6092 return SDValue();
6093 }
6094
6095 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
6096 "Unexpected types");
6097
6098 if (VT.isFixedLengthVector()) {
6099 // We can handle fixed length vector bitcasts with a simple replacement
6100 // in isel.
6101 if (Op0VT.isFixedLengthVector())
6102 return Op;
6103 // When bitcasting from scalar to fixed-length vector, insert the scalar
6104 // into a one-element vector of the result type, and perform a vector
6105 // bitcast.
6106 if (!Op0VT.isVector()) {
6107 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
6108 if (!isTypeLegal(BVT))
6109 return SDValue();
6110 return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
6111 DAG.getUNDEF(BVT), Op0,
6112 DAG.getVectorIdxConstant(0, DL)));
6113 }
6114 return SDValue();
6115 }
6116 // Custom-legalize bitcasts from fixed-length vector types to scalar types
6117 // thus: bitcast the vector to a one-element vector type whose element type
6118 // is the same as the result type, and extract the first element.
6119 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
6120 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
6121 if (!isTypeLegal(BVT))
6122 return SDValue();
6123 SDValue BVec = DAG.getBitcast(BVT, Op0);
6124 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
6125 DAG.getVectorIdxConstant(0, DL));
6126 }
6127 return SDValue();
6128 }
6130 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
6132 return LowerINTRINSIC_W_CHAIN(Op, DAG);
6134 return LowerINTRINSIC_VOID(Op, DAG);
6135 case ISD::IS_FPCLASS:
6136 return LowerIS_FPCLASS(Op, DAG);
6137 case ISD::BITREVERSE: {
6138 MVT VT = Op.getSimpleValueType();
6139 if (VT.isFixedLengthVector()) {
6140 assert(Subtarget.hasStdExtZvbb());
6141 return lowerToScalableOp(Op, DAG);
6142 }
6143 SDLoc DL(Op);
6144 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
6145 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
6146 // Expand bitreverse to a bswap(rev8) followed by brev8.
6147 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
6148 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
6149 }
6150 case ISD::TRUNCATE:
6151 // Only custom-lower vector truncates
6152 if (!Op.getSimpleValueType().isVector())
6153 return Op;
6154 return lowerVectorTruncLike(Op, DAG);
6155 case ISD::ANY_EXTEND:
6156 case ISD::ZERO_EXTEND:
6157 if (Op.getOperand(0).getValueType().isVector() &&
6158 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6159 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
6160 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
6161 case ISD::SIGN_EXTEND:
6162 if (Op.getOperand(0).getValueType().isVector() &&
6163 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6164 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
6165 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
6167 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
6169 return lowerINSERT_VECTOR_ELT(Op, DAG);
6171 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
6172 case ISD::SCALAR_TO_VECTOR: {
6173 MVT VT = Op.getSimpleValueType();
6174 SDLoc DL(Op);
6175 SDValue Scalar = Op.getOperand(0);
6176 if (VT.getVectorElementType() == MVT::i1) {
6177 MVT WideVT = VT.changeVectorElementType(MVT::i8);
6178 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
6179 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
6180 }
6181 MVT ContainerVT = VT;
6182 if (VT.isFixedLengthVector())
6183 ContainerVT = getContainerForFixedLengthVector(VT);
6184 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
6185 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
6186 SDValue V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
6187 DAG.getUNDEF(ContainerVT), Scalar, VL);
6188 if (VT.isFixedLengthVector())
6189 V = convertFromScalableVector(VT, V, DAG, Subtarget);
6190 return V;
6191 }
6192 case ISD::VSCALE: {
6193 MVT XLenVT = Subtarget.getXLenVT();
6194 MVT VT = Op.getSimpleValueType();
6195 SDLoc DL(Op);
6196 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
6197 // We define our scalable vector types for lmul=1 to use a 64 bit known
6198 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
6199 // vscale as VLENB / 8.
6200 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
6201 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
6202 report_fatal_error("Support for VLEN==32 is incomplete.");
6203 // We assume VLENB is a multiple of 8. We manually choose the best shift
6204 // here because SimplifyDemandedBits isn't always able to simplify it.
6205 uint64_t Val = Op.getConstantOperandVal(0);
6206 if (isPowerOf2_64(Val)) {
6207 uint64_t Log2 = Log2_64(Val);
6208 if (Log2 < 3)
6209 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6210 DAG.getConstant(3 - Log2, DL, VT));
6211 else if (Log2 > 3)
6212 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
6213 DAG.getConstant(Log2 - 3, DL, XLenVT));
6214 } else if ((Val % 8) == 0) {
6215 // If the multiplier is a multiple of 8, scale it down to avoid needing
6216 // to shift the VLENB value.
6217 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
6218 DAG.getConstant(Val / 8, DL, XLenVT));
6219 } else {
6220 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6221 DAG.getConstant(3, DL, XLenVT));
6222 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
6223 DAG.getConstant(Val, DL, XLenVT));
6224 }
6225 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6226 }
6227 case ISD::FPOWI: {
6228 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
6229 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
6230 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
6231 Op.getOperand(1).getValueType() == MVT::i32) {
6232 SDLoc DL(Op);
6233 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6234 SDValue Powi =
6235 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
6236 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
6237 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6238 }
6239 return SDValue();
6240 }
6241 case ISD::FMAXIMUM:
6242 case ISD::FMINIMUM:
6243 if (Op.getValueType() == MVT::nxv32f16 &&
6244 (Subtarget.hasVInstructionsF16Minimal() &&
6245 !Subtarget.hasVInstructionsF16()))
6246 return SplitVectorOp(Op, DAG);
6247 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
6248 case ISD::FP_EXTEND: {
6249 SDLoc DL(Op);
6250 EVT VT = Op.getValueType();
6251 SDValue Op0 = Op.getOperand(0);
6252 EVT Op0VT = Op0.getValueType();
6253 if (VT == MVT::f32 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin())
6254 return DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
6255 if (VT == MVT::f64 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) {
6256 SDValue FloatVal =
6257 DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
6258 return DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, FloatVal);
6259 }
6260
6261 if (!Op.getValueType().isVector())
6262 return Op;
6263 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6264 }
6265 case ISD::FP_ROUND: {
6266 SDLoc DL(Op);
6267 EVT VT = Op.getValueType();
6268 SDValue Op0 = Op.getOperand(0);
6269 EVT Op0VT = Op0.getValueType();
6270 if (VT == MVT::bf16 && Op0VT == MVT::f32 && Subtarget.hasStdExtZfbfmin())
6271 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, Op0);
6272 if (VT == MVT::bf16 && Op0VT == MVT::f64 && Subtarget.hasStdExtZfbfmin() &&
6273 Subtarget.hasStdExtDOrZdinx()) {
6274 SDValue FloatVal =
6275 DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, Op0,
6276 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6277 return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, FloatVal);
6278 }
6279
6280 if (!Op.getValueType().isVector())
6281 return Op;
6282 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6283 }
6286 return lowerStrictFPExtendOrRoundLike(Op, DAG);
6287 case ISD::SINT_TO_FP:
6288 case ISD::UINT_TO_FP:
6289 if (Op.getValueType().isVector() &&
6290 Op.getValueType().getScalarType() == MVT::f16 &&
6291 (Subtarget.hasVInstructionsF16Minimal() &&
6292 !Subtarget.hasVInstructionsF16())) {
6293 if (Op.getValueType() == MVT::nxv32f16)
6294 return SplitVectorOp(Op, DAG);
6295 // int -> f32
6296 SDLoc DL(Op);
6297 MVT NVT =
6298 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
6299 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
6300 // f32 -> f16
6301 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
6302 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6303 }
6304 [[fallthrough]];
6305 case ISD::FP_TO_SINT:
6306 case ISD::FP_TO_UINT:
6307 if (SDValue Op1 = Op.getOperand(0);
6308 Op1.getValueType().isVector() &&
6309 Op1.getValueType().getScalarType() == MVT::f16 &&
6310 (Subtarget.hasVInstructionsF16Minimal() &&
6311 !Subtarget.hasVInstructionsF16())) {
6312 if (Op1.getValueType() == MVT::nxv32f16)
6313 return SplitVectorOp(Op, DAG);
6314 // f16 -> f32
6315 SDLoc DL(Op);
6316 MVT NVT = MVT::getVectorVT(MVT::f32,
6317 Op1.getValueType().getVectorElementCount());
6318 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
6319 // f32 -> int
6320 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
6321 }
6322 [[fallthrough]];
6327 // RVV can only do fp<->int conversions to types half/double the size as
6328 // the source. We custom-lower any conversions that do two hops into
6329 // sequences.
6330 MVT VT = Op.getSimpleValueType();
6331 if (!VT.isVector())
6332 return Op;
6333 SDLoc DL(Op);
6334 bool IsStrict = Op->isStrictFPOpcode();
6335 SDValue Src = Op.getOperand(0 + IsStrict);
6336 MVT EltVT = VT.getVectorElementType();
6337 MVT SrcVT = Src.getSimpleValueType();
6338 MVT SrcEltVT = SrcVT.getVectorElementType();
6339 unsigned EltSize = EltVT.getSizeInBits();
6340 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
6341 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
6342 "Unexpected vector element types");
6343
6344 bool IsInt2FP = SrcEltVT.isInteger();
6345 // Widening conversions
6346 if (EltSize > (2 * SrcEltSize)) {
6347 if (IsInt2FP) {
6348 // Do a regular integer sign/zero extension then convert to float.
6349 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
6351 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
6352 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
6355 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
6356 if (IsStrict)
6357 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
6358 Op.getOperand(0), Ext);
6359 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
6360 }
6361 // FP2Int
6362 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
6363 // Do one doubling fp_extend then complete the operation by converting
6364 // to int.
6365 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6366 if (IsStrict) {
6367 auto [FExt, Chain] =
6368 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
6369 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
6370 }
6371 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
6372 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
6373 }
6374
6375 // Narrowing conversions
6376 if (SrcEltSize > (2 * EltSize)) {
6377 if (IsInt2FP) {
6378 // One narrowing int_to_fp, then an fp_round.
6379 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
6380 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6381 if (IsStrict) {
6382 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
6383 DAG.getVTList(InterimFVT, MVT::Other),
6384 Op.getOperand(0), Src);
6385 SDValue Chain = Int2FP.getValue(1);
6386 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
6387 }
6388 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
6389 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
6390 }
6391 // FP2Int
6392 // One narrowing fp_to_int, then truncate the integer. If the float isn't
6393 // representable by the integer, the result is poison.
6394 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
6396 if (IsStrict) {
6397 SDValue FP2Int =
6398 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
6399 Op.getOperand(0), Src);
6400 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
6401 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
6402 }
6403 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
6404 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
6405 }
6406
6407 // Scalable vectors can exit here. Patterns will handle equally-sized
6408 // conversions halving/doubling ones.
6409 if (!VT.isFixedLengthVector())
6410 return Op;
6411
6412 // For fixed-length vectors we lower to a custom "VL" node.
6413 unsigned RVVOpc = 0;
6414 switch (Op.getOpcode()) {
6415 default:
6416 llvm_unreachable("Impossible opcode");
6417 case ISD::FP_TO_SINT:
6419 break;
6420 case ISD::FP_TO_UINT:
6422 break;
6423 case ISD::SINT_TO_FP:
6424 RVVOpc = RISCVISD::SINT_TO_FP_VL;
6425 break;
6426 case ISD::UINT_TO_FP:
6427 RVVOpc = RISCVISD::UINT_TO_FP_VL;
6428 break;
6431 break;
6434 break;
6437 break;
6440 break;
6441 }
6442
6443 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6444 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
6445 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
6446 "Expected same element count");
6447
6448 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6449
6450 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
6451 if (IsStrict) {
6452 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
6453 Op.getOperand(0), Src, Mask, VL);
6454 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
6455 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
6456 }
6457 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
6458 return convertFromScalableVector(VT, Src, DAG, Subtarget);
6459 }
6462 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
6463 case ISD::FP_TO_BF16: {
6464 // Custom lower to ensure the libcall return is passed in an FPR on hard
6465 // float ABIs.
6466 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
6467 SDLoc DL(Op);
6468 MakeLibCallOptions CallOptions;
6469 RTLIB::Libcall LC =
6470 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
6471 SDValue Res =
6472 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
6473 if (Subtarget.is64Bit() && !RV64LegalI32)
6474 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
6475 return DAG.getBitcast(MVT::i32, Res);
6476 }
6477 case ISD::BF16_TO_FP: {
6478 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
6479 MVT VT = Op.getSimpleValueType();
6480 SDLoc DL(Op);
6481 Op = DAG.getNode(
6482 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
6483 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
6484 SDValue Res = Subtarget.is64Bit()
6485 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
6486 : DAG.getBitcast(MVT::f32, Op);
6487 // fp_extend if the target VT is bigger than f32.
6488 if (VT != MVT::f32)
6489 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
6490 return Res;
6491 }
6492 case ISD::FP_TO_FP16: {
6493 // Custom lower to ensure the libcall return is passed in an FPR on hard
6494 // float ABIs.
6495 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6496 SDLoc DL(Op);
6497 MakeLibCallOptions CallOptions;
6498 RTLIB::Libcall LC =
6499 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::f16);
6500 SDValue Res =
6501 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
6502 if (Subtarget.is64Bit() && !RV64LegalI32)
6503 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
6504 return DAG.getBitcast(MVT::i32, Res);
6505 }
6506 case ISD::FP16_TO_FP: {
6507 // Custom lower to ensure the libcall argument is passed in an FPR on hard
6508 // float ABIs.
6509 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
6510 SDLoc DL(Op);
6511 MakeLibCallOptions CallOptions;
6512 SDValue Arg = Subtarget.is64Bit()
6513 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32,
6514 Op.getOperand(0))
6515 : DAG.getBitcast(MVT::f32, Op.getOperand(0));
6516 SDValue Res =
6517 makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg, CallOptions, DL)
6518 .first;
6519 return Res;
6520 }
6521 case ISD::FTRUNC:
6522 case ISD::FCEIL:
6523 case ISD::FFLOOR:
6524 case ISD::FNEARBYINT:
6525 case ISD::FRINT:
6526 case ISD::FROUND:
6527 case ISD::FROUNDEVEN:
6528 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6529 case ISD::LRINT:
6530 case ISD::LLRINT:
6531 return lowerVectorXRINT(Op, DAG, Subtarget);
6532 case ISD::VECREDUCE_ADD:
6537 return lowerVECREDUCE(Op, DAG);
6538 case ISD::VECREDUCE_AND:
6539 case ISD::VECREDUCE_OR:
6540 case ISD::VECREDUCE_XOR:
6541 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6542 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
6543 return lowerVECREDUCE(Op, DAG);
6550 return lowerFPVECREDUCE(Op, DAG);
6551 case ISD::VP_REDUCE_ADD:
6552 case ISD::VP_REDUCE_UMAX:
6553 case ISD::VP_REDUCE_SMAX:
6554 case ISD::VP_REDUCE_UMIN:
6555 case ISD::VP_REDUCE_SMIN:
6556 case ISD::VP_REDUCE_FADD:
6557 case ISD::VP_REDUCE_SEQ_FADD:
6558 case ISD::VP_REDUCE_FMIN:
6559 case ISD::VP_REDUCE_FMAX:
6560 if (Op.getOperand(1).getValueType() == MVT::nxv32f16 &&
6561 (Subtarget.hasVInstructionsF16Minimal() &&
6562 !Subtarget.hasVInstructionsF16()))
6563 return SplitVectorReductionOp(Op, DAG);
6564 return lowerVPREDUCE(Op, DAG);
6565 case ISD::VP_REDUCE_AND:
6566 case ISD::VP_REDUCE_OR:
6567 case ISD::VP_REDUCE_XOR:
6568 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
6569 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
6570 return lowerVPREDUCE(Op, DAG);
6571 case ISD::UNDEF: {
6572 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
6573 return convertFromScalableVector(Op.getSimpleValueType(),
6574 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
6575 }
6577 return lowerINSERT_SUBVECTOR(Op, DAG);
6579 return lowerEXTRACT_SUBVECTOR(Op, DAG);
6581 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
6583 return lowerVECTOR_INTERLEAVE(Op, DAG);
6584 case ISD::STEP_VECTOR:
6585 return lowerSTEP_VECTOR(Op, DAG);
6587 return lowerVECTOR_REVERSE(Op, DAG);
6588 case ISD::VECTOR_SPLICE:
6589 return lowerVECTOR_SPLICE(Op, DAG);
6590 case ISD::BUILD_VECTOR:
6591 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
6592 case ISD::SPLAT_VECTOR:
6593 if (Op.getValueType().getScalarType() == MVT::f16 &&
6594 (Subtarget.hasVInstructionsF16Minimal() &&
6595 !Subtarget.hasVInstructionsF16())) {
6596 if (Op.getValueType() == MVT::nxv32f16)
6597 return SplitVectorOp(Op, DAG);
6598 SDLoc DL(Op);
6599 SDValue NewScalar =
6600 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6601 SDValue NewSplat = DAG.getNode(
6603 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount()),
6604 NewScalar);
6605 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NewSplat,
6606 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6607 }
6608 if (Op.getValueType().getVectorElementType() == MVT::i1)
6609 return lowerVectorMaskSplat(Op, DAG);
6610 return SDValue();
6612 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
6613 case ISD::CONCAT_VECTORS: {
6614 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
6615 // better than going through the stack, as the default expansion does.
6616 SDLoc DL(Op);
6617 MVT VT = Op.getSimpleValueType();
6618 MVT ContainerVT = VT;
6619 if (VT.isFixedLengthVector())
6620 ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
6621
6622 // Recursively split concat_vectors with more than 2 operands:
6623 //
6624 // concat_vector op1, op2, op3, op4
6625 // ->
6626 // concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
6627 //
6628 // This reduces the length of the chain of vslideups and allows us to
6629 // perform the vslideups at a smaller LMUL, limited to MF2.
6630 if (Op.getNumOperands() > 2 &&
6631 ContainerVT.bitsGE(getLMUL1VT(ContainerVT))) {
6632 MVT HalfVT = VT.getHalfNumVectorElementsVT();
6634 size_t HalfNumOps = Op.getNumOperands() / 2;
6635 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
6636 Op->ops().take_front(HalfNumOps));
6637 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
6638 Op->ops().drop_front(HalfNumOps));
6639 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
6640 }
6641
6642 unsigned NumOpElts =
6643 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
6644 SDValue Vec = DAG.getUNDEF(VT);
6645 for (const auto &OpIdx : enumerate(Op->ops())) {
6646 SDValue SubVec = OpIdx.value();
6647 // Don't insert undef subvectors.
6648 if (SubVec.isUndef())
6649 continue;
6650 Vec =
6651 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
6652 DAG.getVectorIdxConstant(OpIdx.index() * NumOpElts, DL));
6653 }
6654 return Vec;
6655 }
6656 case ISD::LOAD:
6657 if (auto V = expandUnalignedRVVLoad(Op, DAG))
6658 return V;
6659 if (Op.getValueType().isFixedLengthVector())
6660 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
6661 return Op;
6662 case ISD::STORE:
6663 if (auto V = expandUnalignedRVVStore(Op, DAG))
6664 return V;
6665 if (Op.getOperand(1).getValueType().isFixedLengthVector())
6666 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
6667 return Op;
6668 case ISD::MLOAD:
6669 case ISD::VP_LOAD:
6670 return lowerMaskedLoad(Op, DAG);
6671 case ISD::MSTORE:
6672 case ISD::VP_STORE:
6673 return lowerMaskedStore(Op, DAG);
6674 case ISD::SELECT_CC: {
6675 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
6676 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
6677 // into separate SETCC+SELECT just like LegalizeDAG.
6678 SDValue Tmp1 = Op.getOperand(0);
6679 SDValue Tmp2 = Op.getOperand(1);
6680 SDValue True = Op.getOperand(2);
6681 SDValue False = Op.getOperand(3);
6682 EVT VT = Op.getValueType();
6683 SDValue CC = Op.getOperand(4);
6684 EVT CmpVT = Tmp1.getValueType();
6685 EVT CCVT =
6686 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
6687 SDLoc DL(Op);
6688 SDValue Cond =
6689 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
6690 return DAG.getSelect(DL, VT, Cond, True, False);
6691 }
6692 case ISD::SETCC: {
6693 MVT OpVT = Op.getOperand(0).getSimpleValueType();
6694 if (OpVT.isScalarInteger()) {
6695 MVT VT = Op.getSimpleValueType();
6696 SDValue LHS = Op.getOperand(0);
6697 SDValue RHS = Op.getOperand(1);
6698 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
6699 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
6700 "Unexpected CondCode");
6701
6702 SDLoc DL(Op);
6703
6704 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
6705 // convert this to the equivalent of (set(u)ge X, C+1) by using
6706 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
6707 // in a register.
6708 if (isa<ConstantSDNode>(RHS)) {
6709 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
6710 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
6711 // If this is an unsigned compare and the constant is -1, incrementing
6712 // the constant would change behavior. The result should be false.
6713 if (CCVal == ISD::SETUGT && Imm == -1)
6714 return DAG.getConstant(0, DL, VT);
6715 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
6716 CCVal = ISD::getSetCCSwappedOperands(CCVal);
6717 SDValue SetCC = DAG.getSetCC(
6718 DL, VT, LHS, DAG.getConstant(Imm + 1, DL, OpVT), CCVal);
6719 return DAG.getLogicalNOT(DL, SetCC, VT);
6720 }
6721 }
6722
6723 // Not a constant we could handle, swap the operands and condition code to
6724 // SETLT/SETULT.
6725 CCVal = ISD::getSetCCSwappedOperands(CCVal);
6726 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
6727 }
6728
6729 if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
6730 (Subtarget.hasVInstructionsF16Minimal() &&
6731 !Subtarget.hasVInstructionsF16()))
6732 return SplitVectorOp(Op, DAG);
6733
6734 return lowerFixedLengthVectorSetccToRVV(Op, DAG);
6735 }
6736 case ISD::ADD:
6737 case ISD::SUB:
6738 case ISD::MUL:
6739 case ISD::MULHS:
6740 case ISD::MULHU:
6741 case ISD::AND:
6742 case ISD::OR:
6743 case ISD::XOR:
6744 case ISD::SDIV:
6745 case ISD::SREM:
6746 case ISD::UDIV:
6747 case ISD::UREM:
6748 case ISD::BSWAP:
6749 case ISD::CTPOP:
6750 return lowerToScalableOp(Op, DAG);
6751 case ISD::SHL:
6752 case ISD::SRA:
6753 case ISD::SRL:
6754 if (Op.getSimpleValueType().isFixedLengthVector())
6755 return lowerToScalableOp(Op, DAG);
6756 // This can be called for an i32 shift amount that needs to be promoted.
6757 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
6758 "Unexpected custom legalisation");
6759 return SDValue();
6760 case ISD::FADD:
6761 case ISD::FSUB:
6762 case ISD::FMUL:
6763 case ISD::FDIV:
6764 case ISD::FNEG:
6765 case ISD::FABS:
6766 case ISD::FSQRT:
6767 case ISD::FMA:
6768 case ISD::FMINNUM:
6769 case ISD::FMAXNUM:
6770 if (Op.getValueType() == MVT::nxv32f16 &&
6771 (Subtarget.hasVInstructionsF16Minimal() &&
6772 !Subtarget.hasVInstructionsF16()))
6773 return SplitVectorOp(Op, DAG);
6774 [[fallthrough]];
6775 case ISD::AVGFLOORU:
6776 case ISD::AVGCEILU:
6777 case ISD::SMIN:
6778 case ISD::SMAX:
6779 case ISD::UMIN:
6780 case ISD::UMAX:
6781 return lowerToScalableOp(Op, DAG);
6782 case ISD::UADDSAT:
6783 case ISD::USUBSAT:
6784 if (!Op.getValueType().isVector())
6785 return lowerUADDSAT_USUBSAT(Op, DAG);
6786 return lowerToScalableOp(Op, DAG);
6787 case ISD::SADDSAT:
6788 case ISD::SSUBSAT:
6789 if (!Op.getValueType().isVector())
6790 return lowerSADDSAT_SSUBSAT(Op, DAG);
6791 return lowerToScalableOp(Op, DAG);
6792 case ISD::ABDS:
6793 case ISD::ABDU: {
6794 SDLoc dl(Op);
6795 EVT VT = Op->getValueType(0);
6796 SDValue LHS = DAG.getFreeze(Op->getOperand(0));
6797 SDValue RHS = DAG.getFreeze(Op->getOperand(1));
6798 bool IsSigned = Op->getOpcode() == ISD::ABDS;
6799
6800 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
6801 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
6802 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
6803 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
6804 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
6805 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
6806 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
6807 }
6808 case ISD::ABS:
6809 case ISD::VP_ABS:
6810 return lowerABS(Op, DAG);
6811 case ISD::CTLZ:
6813 case ISD::CTTZ:
6815 if (Subtarget.hasStdExtZvbb())
6816 return lowerToScalableOp(Op, DAG);
6817 assert(Op.getOpcode() != ISD::CTTZ);
6818 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
6819 case ISD::VSELECT:
6820 return lowerFixedLengthVectorSelectToRVV(Op, DAG);
6821 case ISD::FCOPYSIGN:
6822 if (Op.getValueType() == MVT::nxv32f16 &&
6823 (Subtarget.hasVInstructionsF16Minimal() &&
6824 !Subtarget.hasVInstructionsF16()))
6825 return SplitVectorOp(Op, DAG);
6826 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
6827 case ISD::STRICT_FADD:
6828 case ISD::STRICT_FSUB:
6829 case ISD::STRICT_FMUL:
6830 case ISD::STRICT_FDIV:
6831 case ISD::STRICT_FSQRT:
6832 case ISD::STRICT_FMA:
6833 if (Op.getValueType() == MVT::nxv32f16 &&
6834 (Subtarget.hasVInstructionsF16Minimal() &&
6835 !Subtarget.hasVInstructionsF16()))
6836 return SplitStrictFPVectorOp(Op, DAG);
6837 return lowerToScalableOp(Op, DAG);
6838 case ISD::STRICT_FSETCC:
6840 return lowerVectorStrictFSetcc(Op, DAG);
6841 case ISD::STRICT_FCEIL:
6842 case ISD::STRICT_FRINT:
6843 case ISD::STRICT_FFLOOR:
6844 case ISD::STRICT_FTRUNC:
6846 case ISD::STRICT_FROUND:
6848 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6849 case ISD::MGATHER:
6850 case ISD::VP_GATHER:
6851 return lowerMaskedGather(Op, DAG);
6852 case ISD::MSCATTER:
6853 case ISD::VP_SCATTER:
6854 return lowerMaskedScatter(Op, DAG);
6855 case ISD::GET_ROUNDING:
6856 return lowerGET_ROUNDING(Op, DAG);
6857 case ISD::SET_ROUNDING:
6858 return lowerSET_ROUNDING(Op, DAG);
6859 case ISD::EH_DWARF_CFA:
6860 return lowerEH_DWARF_CFA(Op, DAG);
6861 case ISD::VP_SELECT:
6862 case ISD::VP_MERGE:
6863 case ISD::VP_ADD:
6864 case ISD::VP_SUB:
6865 case ISD::VP_MUL:
6866 case ISD::VP_SDIV:
6867 case ISD::VP_UDIV:
6868 case ISD::VP_SREM:
6869 case ISD::VP_UREM:
6870 case ISD::VP_UADDSAT:
6871 case ISD::VP_USUBSAT:
6872 case ISD::VP_SADDSAT:
6873 case ISD::VP_SSUBSAT:
6874 case ISD::VP_LRINT:
6875 case ISD::VP_LLRINT:
6876 return lowerVPOp(Op, DAG);
6877 case ISD::VP_AND:
6878 case ISD::VP_OR:
6879 case ISD::VP_XOR:
6880 return lowerLogicVPOp(Op, DAG);
6881 case ISD::VP_FADD:
6882 case ISD::VP_FSUB:
6883 case ISD::VP_FMUL:
6884 case ISD::VP_FDIV:
6885 case ISD::VP_FNEG:
6886 case ISD::VP_FABS:
6887 case ISD::VP_SQRT:
6888 case ISD::VP_FMA:
6889 case ISD::VP_FMINNUM:
6890 case ISD::VP_FMAXNUM:
6891 case ISD::VP_FCOPYSIGN:
6892 if (Op.getValueType() == MVT::nxv32f16 &&
6893 (Subtarget.hasVInstructionsF16Minimal() &&
6894 !Subtarget.hasVInstructionsF16()))
6895 return SplitVPOp(Op, DAG);
6896 [[fallthrough]];
6897 case ISD::VP_ASHR:
6898 case ISD::VP_LSHR:
6899 case ISD::VP_SHL:
6900 return lowerVPOp(Op, DAG);
6901 case ISD::VP_IS_FPCLASS:
6902 return LowerIS_FPCLASS(Op, DAG);
6903 case ISD::VP_SIGN_EXTEND:
6904 case ISD::VP_ZERO_EXTEND:
6905 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
6906 return lowerVPExtMaskOp(Op, DAG);
6907 return lowerVPOp(Op, DAG);
6908 case ISD::VP_TRUNCATE:
6909 return lowerVectorTruncLike(Op, DAG);
6910 case ISD::VP_FP_EXTEND:
6911 case ISD::VP_FP_ROUND:
6912 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6913 case ISD::VP_SINT_TO_FP:
6914 case ISD::VP_UINT_TO_FP:
6915 if (Op.getValueType().isVector() &&
6916 Op.getValueType().getScalarType() == MVT::f16 &&
6917 (Subtarget.hasVInstructionsF16Minimal() &&
6918 !Subtarget.hasVInstructionsF16())) {
6919 if (Op.getValueType() == MVT::nxv32f16)
6920 return SplitVPOp(Op, DAG);
6921 // int -> f32
6922 SDLoc DL(Op);
6923 MVT NVT =
6924 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
6925 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
6926 // f32 -> f16
6927 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
6928 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6929 }
6930 [[fallthrough]];
6931 case ISD::VP_FP_TO_SINT:
6932 case ISD::VP_FP_TO_UINT:
6933 if (SDValue Op1 = Op.getOperand(0);
6934 Op1.getValueType().isVector() &&
6935 Op1.getValueType().getScalarType() == MVT::f16 &&
6936 (Subtarget.hasVInstructionsF16Minimal() &&
6937 !Subtarget.hasVInstructionsF16())) {
6938 if (Op1.getValueType() == MVT::nxv32f16)
6939 return SplitVPOp(Op, DAG);
6940 // f16 -> f32
6941 SDLoc DL(Op);
6942 MVT NVT = MVT::getVectorVT(MVT::f32,
6943 Op1.getValueType().getVectorElementCount());
6944 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
6945 // f32 -> int
6946 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6947 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
6948 }
6949 return lowerVPFPIntConvOp(Op, DAG);
6950 case ISD::VP_SETCC:
6951 if (Op.getOperand(0).getSimpleValueType() == MVT::nxv32f16 &&
6952 (Subtarget.hasVInstructionsF16Minimal() &&
6953 !Subtarget.hasVInstructionsF16()))
6954 return SplitVPOp(Op, DAG);
6955 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
6956 return lowerVPSetCCMaskOp(Op, DAG);
6957 [[fallthrough]];
6958 case ISD::VP_SMIN:
6959 case ISD::VP_SMAX:
6960 case ISD::VP_UMIN:
6961 case ISD::VP_UMAX:
6962 case ISD::VP_BITREVERSE:
6963 case ISD::VP_BSWAP:
6964 return lowerVPOp(Op, DAG);
6965 case ISD::VP_CTLZ:
6966 case ISD::VP_CTLZ_ZERO_UNDEF:
6967 if (Subtarget.hasStdExtZvbb())
6968 return lowerVPOp(Op, DAG);
6969 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
6970 case ISD::VP_CTTZ:
6971 case ISD::VP_CTTZ_ZERO_UNDEF:
6972 if (Subtarget.hasStdExtZvbb())
6973 return lowerVPOp(Op, DAG);
6974 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
6975 case ISD::VP_CTPOP:
6976 return lowerVPOp(Op, DAG);
6977 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
6978 return lowerVPStridedLoad(Op, DAG);
6979 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
6980 return lowerVPStridedStore(Op, DAG);
6981 case ISD::VP_FCEIL:
6982 case ISD::VP_FFLOOR:
6983 case ISD::VP_FRINT:
6984 case ISD::VP_FNEARBYINT:
6985 case ISD::VP_FROUND:
6986 case ISD::VP_FROUNDEVEN:
6987 case ISD::VP_FROUNDTOZERO:
6988 if (Op.getValueType() == MVT::nxv32f16 &&
6989 (Subtarget.hasVInstructionsF16Minimal() &&
6990 !Subtarget.hasVInstructionsF16()))
6991 return SplitVPOp(Op, DAG);
6992 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
6993 case ISD::VP_FMAXIMUM:
6994 case ISD::VP_FMINIMUM:
6995 if (Op.getValueType() == MVT::nxv32f16 &&
6996 (Subtarget.hasVInstructionsF16Minimal() &&
6997 !Subtarget.hasVInstructionsF16()))
6998 return SplitVPOp(Op, DAG);
6999 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
7000 case ISD::EXPERIMENTAL_VP_SPLICE:
7001 return lowerVPSpliceExperimental(Op, DAG);
7002 case ISD::EXPERIMENTAL_VP_REVERSE:
7003 return lowerVPReverseExperimental(Op, DAG);
7004 }
7005}
7006
7008 SelectionDAG &DAG, unsigned Flags) {
7009 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
7010}
7011
7013 SelectionDAG &DAG, unsigned Flags) {
7014 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
7015 Flags);
7016}
7017
7019 SelectionDAG &DAG, unsigned Flags) {
7020 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
7021 N->getOffset(), Flags);
7022}
7023
7025 SelectionDAG &DAG, unsigned Flags) {
7026 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
7027}
7028
7029template <class NodeTy>
7030SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
7031 bool IsLocal, bool IsExternWeak) const {
7032 SDLoc DL(N);
7033 EVT Ty = getPointerTy(DAG.getDataLayout());
7034
7035 // When HWASAN is used and tagging of global variables is enabled
7036 // they should be accessed via the GOT, since the tagged address of a global
7037 // is incompatible with existing code models. This also applies to non-pic
7038 // mode.
7039 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
7040 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7041 if (IsLocal && !Subtarget.allowTaggedGlobals())
7042 // Use PC-relative addressing to access the symbol. This generates the
7043 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
7044 // %pcrel_lo(auipc)).
7045 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7046
7047 // Use PC-relative addressing to access the GOT for this symbol, then load
7048 // the address from the GOT. This generates the pattern (PseudoLGA sym),
7049 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7050 SDValue Load =
7051 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7057 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7058 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7059 return Load;
7060 }
7061
7062 switch (getTargetMachine().getCodeModel()) {
7063 default:
7064 report_fatal_error("Unsupported code model for lowering");
7065 case CodeModel::Small: {
7066 // Generate a sequence for accessing addresses within the first 2 GiB of
7067 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
7068 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
7069 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
7070 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
7071 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
7072 }
7073 case CodeModel::Medium: {
7074 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7075 if (IsExternWeak) {
7076 // An extern weak symbol may be undefined, i.e. have value 0, which may
7077 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
7078 // symbol. This generates the pattern (PseudoLGA sym), which expands to
7079 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7080 SDValue Load =
7081 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7087 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7088 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7089 return Load;
7090 }
7091
7092 // Generate a sequence for accessing addresses within any 2GiB range within
7093 // the address space. This generates the pattern (PseudoLLA sym), which
7094 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
7095 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7096 }
7097 }
7098}
7099
7100SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
7101 SelectionDAG &DAG) const {
7102 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
7103 assert(N->getOffset() == 0 && "unexpected offset in global node");
7104 const GlobalValue *GV = N->getGlobal();
7105 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
7106}
7107
7108SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
7109 SelectionDAG &DAG) const {
7110 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
7111
7112 return getAddr(N, DAG);
7113}
7114
7115SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
7116 SelectionDAG &DAG) const {
7117 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
7118
7119 return getAddr(N, DAG);
7120}
7121
7122SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
7123 SelectionDAG &DAG) const {
7124 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
7125
7126 return getAddr(N, DAG);
7127}
7128
7129SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
7130 SelectionDAG &DAG,
7131 bool UseGOT) const {
7132 SDLoc DL(N);
7133 EVT Ty = getPointerTy(DAG.getDataLayout());
7134 const GlobalValue *GV = N->getGlobal();
7135 MVT XLenVT = Subtarget.getXLenVT();
7136
7137 if (UseGOT) {
7138 // Use PC-relative addressing to access the GOT for this TLS symbol, then
7139 // load the address from the GOT and add the thread pointer. This generates
7140 // the pattern (PseudoLA_TLS_IE sym), which expands to
7141 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
7142 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7143 SDValue Load =
7144 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
7150 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7151 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7152
7153 // Add the thread pointer.
7154 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
7155 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
7156 }
7157
7158 // Generate a sequence for accessing the address relative to the thread
7159 // pointer, with the appropriate adjustment for the thread pointer offset.
7160 // This generates the pattern
7161 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
7162 SDValue AddrHi =
7164 SDValue AddrAdd =
7166 SDValue AddrLo =
7168
7169 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
7170 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
7171 SDValue MNAdd =
7172 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
7173 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
7174}
7175
7176SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
7177 SelectionDAG &DAG) const {
7178 SDLoc DL(N);
7179 EVT Ty = getPointerTy(DAG.getDataLayout());
7180 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
7181 const GlobalValue *GV = N->getGlobal();
7182
7183 // Use a PC-relative addressing mode to access the global dynamic GOT address.
7184 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
7185 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
7186 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7187 SDValue Load =
7188 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
7189
7190 // Prepare argument list to generate call.
7192 ArgListEntry Entry;
7193 Entry.Node = Load;
7194 Entry.Ty = CallTy;
7195 Args.push_back(Entry);
7196
7197 // Setup call to __tls_get_addr.
7199 CLI.setDebugLoc(DL)
7200 .setChain(DAG.getEntryNode())
7201 .setLibCallee(CallingConv::C, CallTy,
7202 DAG.getExternalSymbol("__tls_get_addr", Ty),
7203 std::move(Args));
7204
7205 return LowerCallTo(CLI).first;
7206}
7207
7208SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
7209 SelectionDAG &DAG) const {
7210 SDLoc DL(N);
7211 EVT Ty = getPointerTy(DAG.getDataLayout());
7212 const GlobalValue *GV = N->getGlobal();
7213
7214 // Use a PC-relative addressing mode to access the global dynamic GOT address.
7215 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
7216 //
7217 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
7218 // lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
7219 // addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)
7220 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
7221 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7222 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);
7223}
7224
7225SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
7226 SelectionDAG &DAG) const {
7227 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
7228 assert(N->getOffset() == 0 && "unexpected offset in global node");
7229
7230 if (DAG.getTarget().useEmulatedTLS())
7231 return LowerToTLSEmulatedModel(N, DAG);
7232
7234
7237 report_fatal_error("In GHC calling convention TLS is not supported");
7238
7239 SDValue Addr;
7240 switch (Model) {
7242 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
7243 break;
7245 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
7246 break;
7249 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
7250 : getDynamicTLSAddr(N, DAG);
7251 break;
7252 }
7253
7254 return Addr;
7255}
7256
7257// Return true if Val is equal to (setcc LHS, RHS, CC).
7258// Return false if Val is the inverse of (setcc LHS, RHS, CC).
7259// Otherwise, return std::nullopt.
7260static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
7261 ISD::CondCode CC, SDValue Val) {
7262 assert(Val->getOpcode() == ISD::SETCC);
7263 SDValue LHS2 = Val.getOperand(0);
7264 SDValue RHS2 = Val.getOperand(1);
7265 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
7266
7267 if (LHS == LHS2 && RHS == RHS2) {
7268 if (CC == CC2)
7269 return true;
7270 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
7271 return false;
7272 } else if (LHS == RHS2 && RHS == LHS2) {
7274 if (CC == CC2)
7275 return true;
7276 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
7277 return false;
7278 }
7279
7280 return std::nullopt;
7281}
7282
7284 const RISCVSubtarget &Subtarget) {
7285 SDValue CondV = N->getOperand(0);
7286 SDValue TrueV = N->getOperand(1);
7287 SDValue FalseV = N->getOperand(2);
7288 MVT VT = N->getSimpleValueType(0);
7289 SDLoc DL(N);
7290
7291 if (!Subtarget.hasConditionalMoveFusion()) {
7292 // (select c, -1, y) -> -c | y
7293 if (isAllOnesConstant(TrueV)) {
7294 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7295 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
7296 }
7297 // (select c, y, -1) -> (c-1) | y
7298 if (isAllOnesConstant(FalseV)) {
7299 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
7300 DAG.getAllOnesConstant(DL, VT));
7301 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
7302 }
7303
7304 // (select c, 0, y) -> (c-1) & y
7305 if (isNullConstant(TrueV)) {
7306 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
7307 DAG.getAllOnesConstant(DL, VT));
7308 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
7309 }
7310 // (select c, y, 0) -> -c & y
7311 if (isNullConstant(FalseV)) {
7312 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7313 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
7314 }
7315 }
7316
7317 // select c, ~x, x --> xor -c, x
7318 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
7319 const APInt &TrueVal = TrueV->getAsAPIntVal();
7320 const APInt &FalseVal = FalseV->getAsAPIntVal();
7321 if (~TrueVal == FalseVal) {
7322 SDValue Neg = DAG.getNegative(CondV, DL, VT);
7323 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
7324 }
7325 }
7326
7327 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
7328 // when both truev and falsev are also setcc.
7329 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
7330 FalseV.getOpcode() == ISD::SETCC) {
7331 SDValue LHS = CondV.getOperand(0);
7332 SDValue RHS = CondV.getOperand(1);
7333 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7334
7335 // (select x, x, y) -> x | y
7336 // (select !x, x, y) -> x & y
7337 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
7338 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
7339 DAG.getFreeze(FalseV));
7340 }
7341 // (select x, y, x) -> x & y
7342 // (select !x, y, x) -> x | y
7343 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
7344 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
7345 DAG.getFreeze(TrueV), FalseV);
7346 }
7347 }
7348
7349 return SDValue();
7350}
7351
7352// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
7353// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
7354// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
7355// being `0` or `-1`. In such cases we can replace `select` with `and`.
7356// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
7357// than `c0`?
7358static SDValue
7360 const RISCVSubtarget &Subtarget) {
7361 if (Subtarget.hasShortForwardBranchOpt())
7362 return SDValue();
7363
7364 unsigned SelOpNo = 0;
7365 SDValue Sel = BO->getOperand(0);
7366 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
7367 SelOpNo = 1;
7368 Sel = BO->getOperand(1);
7369 }
7370
7371 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
7372 return SDValue();
7373
7374 unsigned ConstSelOpNo = 1;
7375 unsigned OtherSelOpNo = 2;
7376 if (!dyn_cast<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
7377 ConstSelOpNo = 2;
7378 OtherSelOpNo = 1;
7379 }
7380 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
7381 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
7382 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
7383 return SDValue();
7384
7385 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
7386 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
7387 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
7388 return SDValue();
7389
7390 SDLoc DL(Sel);
7391 EVT VT = BO->getValueType(0);
7392
7393 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
7394 if (SelOpNo == 1)
7395 std::swap(NewConstOps[0], NewConstOps[1]);
7396
7397 SDValue NewConstOp =
7398 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
7399 if (!NewConstOp)
7400 return SDValue();
7401
7402 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
7403 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
7404 return SDValue();
7405
7406 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
7407 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
7408 if (SelOpNo == 1)
7409 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
7410 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
7411
7412 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
7413 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
7414 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
7415}
7416
7417SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
7418 SDValue CondV = Op.getOperand(0);
7419 SDValue TrueV = Op.getOperand(1);
7420 SDValue FalseV = Op.getOperand(2);
7421 SDLoc DL(Op);
7422 MVT VT = Op.getSimpleValueType();
7423 MVT XLenVT = Subtarget.getXLenVT();
7424
7425 // Lower vector SELECTs to VSELECTs by splatting the condition.
7426 if (VT.isVector()) {
7427 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
7428 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
7429 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
7430 }
7431
7432 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
7433 // nodes to implement the SELECT. Performing the lowering here allows for
7434 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
7435 // sequence or RISCVISD::SELECT_CC node (branch-based select).
7436 if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
7437 VT.isScalarInteger()) {
7438 // (select c, t, 0) -> (czero_eqz t, c)
7439 if (isNullConstant(FalseV))
7440 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
7441 // (select c, 0, f) -> (czero_nez f, c)
7442 if (isNullConstant(TrueV))
7443 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
7444
7445 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
7446 if (TrueV.getOpcode() == ISD::AND &&
7447 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV))
7448 return DAG.getNode(
7449 ISD::OR, DL, VT, TrueV,
7450 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
7451 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
7452 if (FalseV.getOpcode() == ISD::AND &&
7453 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV))
7454 return DAG.getNode(
7455 ISD::OR, DL, VT, FalseV,
7456 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
7457
7458 // Try some other optimizations before falling back to generic lowering.
7459 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
7460 return V;
7461
7462 // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
7463 // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
7464 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
7465 const APInt &TrueVal = TrueV->getAsAPIntVal();
7466 const APInt &FalseVal = FalseV->getAsAPIntVal();
7467 const int TrueValCost = RISCVMatInt::getIntMatCost(
7468 TrueVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
7469 const int FalseValCost = RISCVMatInt::getIntMatCost(
7470 FalseVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
7471 bool IsCZERO_NEZ = TrueValCost <= FalseValCost;
7472 SDValue LHSVal = DAG.getConstant(
7473 IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
7474 SDValue RHSVal =
7475 DAG.getConstant(IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT);
7476 SDValue CMOV =
7478 DL, VT, LHSVal, CondV);
7479 return DAG.getNode(ISD::ADD, DL, VT, CMOV, RHSVal);
7480 }
7481
7482 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
7483 // Unless we have the short forward branch optimization.
7484 if (!Subtarget.hasConditionalMoveFusion())
7485 return DAG.getNode(
7486 ISD::OR, DL, VT,
7487 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
7488 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
7489 }
7490
7491 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
7492 return V;
7493
7494 if (Op.hasOneUse()) {
7495 unsigned UseOpc = Op->use_begin()->getOpcode();
7496 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
7497 SDNode *BinOp = *Op->use_begin();
7498 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->use_begin(),
7499 DAG, Subtarget)) {
7500 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
7501 return lowerSELECT(NewSel, DAG);
7502 }
7503 }
7504 }
7505
7506 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
7507 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
7508 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
7509 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
7510 if (FPTV && FPFV) {
7511 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
7512 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
7513 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
7514 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
7515 DAG.getConstant(1, DL, XLenVT));
7516 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
7517 }
7518 }
7519
7520 // If the condition is not an integer SETCC which operates on XLenVT, we need
7521 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
7522 // (select condv, truev, falsev)
7523 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
7524 if (CondV.getOpcode() != ISD::SETCC ||
7525 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
7526 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
7527 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
7528
7529 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
7530
7531 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
7532 }
7533
7534 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
7535 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
7536 // advantage of the integer compare+branch instructions. i.e.:
7537 // (select (setcc lhs, rhs, cc), truev, falsev)
7538 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
7539 SDValue LHS = CondV.getOperand(0);
7540 SDValue RHS = CondV.getOperand(1);
7541 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7542
7543 // Special case for a select of 2 constants that have a diffence of 1.
7544 // Normally this is done by DAGCombine, but if the select is introduced by
7545 // type legalization or op legalization, we miss it. Restricting to SETLT
7546 // case for now because that is what signed saturating add/sub need.
7547 // FIXME: We don't need the condition to be SETLT or even a SETCC,
7548 // but we would probably want to swap the true/false values if the condition
7549 // is SETGE/SETLE to avoid an XORI.
7550 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
7551 CCVal == ISD::SETLT) {
7552 const APInt &TrueVal = TrueV->getAsAPIntVal();
7553 const APInt &FalseVal = FalseV->getAsAPIntVal();
7554 if (TrueVal - 1 == FalseVal)
7555 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
7556 if (TrueVal + 1 == FalseVal)
7557 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
7558 }
7559
7560 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7561 // 1 < x ? x : 1 -> 0 < x ? x : 1
7562 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
7563 RHS == TrueV && LHS == FalseV) {
7564 LHS = DAG.getConstant(0, DL, VT);
7565 // 0 <u x is the same as x != 0.
7566 if (CCVal == ISD::SETULT) {
7567 std::swap(LHS, RHS);
7568 CCVal = ISD::SETNE;
7569 }
7570 }
7571
7572 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
7573 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
7574 RHS == FalseV) {
7575 RHS = DAG.getConstant(0, DL, VT);
7576 }
7577
7578 SDValue TargetCC = DAG.getCondCode(CCVal);
7579
7580 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
7581 // (select (setcc lhs, rhs, CC), constant, falsev)
7582 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
7583 std::swap(TrueV, FalseV);
7584 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
7585 }
7586
7587 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
7588 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
7589}
7590
7591SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
7592 SDValue CondV = Op.getOperand(1);
7593 SDLoc DL(Op);
7594 MVT XLenVT = Subtarget.getXLenVT();
7595
7596 if (CondV.getOpcode() == ISD::SETCC &&
7597 CondV.getOperand(0).getValueType() == XLenVT) {
7598 SDValue LHS = CondV.getOperand(0);
7599 SDValue RHS = CondV.getOperand(1);
7600 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
7601
7602 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7603
7604 SDValue TargetCC = DAG.getCondCode(CCVal);
7605 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
7606 LHS, RHS, TargetCC, Op.getOperand(2));
7607 }
7608
7609 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
7610 CondV, DAG.getConstant(0, DL, XLenVT),
7611 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
7612}
7613
7614SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
7617
7618 SDLoc DL(Op);
7619 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
7621
7622 // vastart just stores the address of the VarArgsFrameIndex slot into the
7623 // memory location argument.
7624 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
7625 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
7626 MachinePointerInfo(SV));
7627}
7628
7629SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
7630 SelectionDAG &DAG) const {
7631 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
7633 MachineFrameInfo &MFI = MF.getFrameInfo();
7634 MFI.setFrameAddressIsTaken(true);
7635 Register FrameReg = RI.getFrameRegister(MF);
7636 int XLenInBytes = Subtarget.getXLen() / 8;
7637
7638 EVT VT = Op.getValueType();
7639 SDLoc DL(Op);
7640 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
7641 unsigned Depth = Op.getConstantOperandVal(0);
7642 while (Depth--) {
7643 int Offset = -(XLenInBytes * 2);
7644 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
7646 FrameAddr =
7647 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
7648 }
7649 return FrameAddr;
7650}
7651
7652SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
7653 SelectionDAG &DAG) const {
7654 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
7656 MachineFrameInfo &MFI = MF.getFrameInfo();
7657 MFI.setReturnAddressIsTaken(true);
7658 MVT XLenVT = Subtarget.getXLenVT();
7659 int XLenInBytes = Subtarget.getXLen() / 8;
7660
7662 return SDValue();
7663
7664 EVT VT = Op.getValueType();
7665 SDLoc DL(Op);
7666 unsigned Depth = Op.getConstantOperandVal(0);
7667 if (Depth) {
7668 int Off = -XLenInBytes;
7669 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
7670 SDValue Offset = DAG.getConstant(Off, DL, VT);
7671 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
7672 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
7674 }
7675
7676 // Return the value of the return address register, marking it an implicit
7677 // live-in.
7678 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
7679 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
7680}
7681
7682SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
7683 SelectionDAG &DAG) const {
7684 SDLoc DL(Op);
7685 SDValue Lo = Op.getOperand(0);
7686 SDValue Hi = Op.getOperand(1);
7687 SDValue Shamt = Op.getOperand(2);
7688 EVT VT = Lo.getValueType();
7689
7690 // if Shamt-XLEN < 0: // Shamt < XLEN
7691 // Lo = Lo << Shamt
7692 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
7693 // else:
7694 // Lo = 0
7695 // Hi = Lo << (Shamt-XLEN)
7696
7697 SDValue Zero = DAG.getConstant(0, DL, VT);
7698 SDValue One = DAG.getConstant(1, DL, VT);
7699 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
7700 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
7701 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
7702 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
7703
7704 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
7705 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
7706 SDValue ShiftRightLo =
7707 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
7708 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
7709 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
7710 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
7711
7712 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
7713
7714 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
7715 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
7716
7717 SDValue Parts[2] = {Lo, Hi};
7718 return DAG.getMergeValues(Parts, DL);
7719}
7720
7721SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
7722 bool IsSRA) const {
7723 SDLoc DL(Op);
7724 SDValue Lo = Op.getOperand(0);
7725 SDValue Hi = Op.getOperand(1);
7726 SDValue Shamt = Op.getOperand(2);
7727 EVT VT = Lo.getValueType();
7728
7729 // SRA expansion:
7730 // if Shamt-XLEN < 0: // Shamt < XLEN
7731 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
7732 // Hi = Hi >>s Shamt
7733 // else:
7734 // Lo = Hi >>s (Shamt-XLEN);
7735 // Hi = Hi >>s (XLEN-1)
7736 //
7737 // SRL expansion:
7738 // if Shamt-XLEN < 0: // Shamt < XLEN
7739 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
7740 // Hi = Hi >>u Shamt
7741 // else:
7742 // Lo = Hi >>u (Shamt-XLEN);
7743 // Hi = 0;
7744
7745 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
7746
7747 SDValue Zero = DAG.getConstant(0, DL, VT);
7748 SDValue One = DAG.getConstant(1, DL, VT);
7749 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
7750 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
7751 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
7752 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
7753
7754 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
7755 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
7756 SDValue ShiftLeftHi =
7757 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
7758 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
7759 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
7760 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
7761 SDValue HiFalse =
7762 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
7763
7764 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
7765
7766 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
7767 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
7768
7769 SDValue Parts[2] = {Lo, Hi};
7770 return DAG.getMergeValues(Parts, DL);
7771}
7772
7773// Lower splats of i1 types to SETCC. For each mask vector type, we have a
7774// legal equivalently-sized i8 type, so we can use that as a go-between.
7775SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
7776 SelectionDAG &DAG) const {
7777 SDLoc DL(Op);
7778 MVT VT = Op.getSimpleValueType();
7779 SDValue SplatVal = Op.getOperand(0);
7780 // All-zeros or all-ones splats are handled specially.
7781 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
7782 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
7783 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
7784 }
7785 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
7786 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
7787 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
7788 }
7789 MVT InterVT = VT.changeVectorElementType(MVT::i8);
7790 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
7791 DAG.getConstant(1, DL, SplatVal.getValueType()));
7792 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
7793 SDValue Zero = DAG.getConstant(0, DL, InterVT);
7794 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
7795}
7796
7797// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
7798// illegal (currently only vXi64 RV32).
7799// FIXME: We could also catch non-constant sign-extended i32 values and lower
7800// them to VMV_V_X_VL.
7801SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
7802 SelectionDAG &DAG) const {
7803 SDLoc DL(Op);
7804 MVT VecVT = Op.getSimpleValueType();
7805 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
7806 "Unexpected SPLAT_VECTOR_PARTS lowering");
7807
7808 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
7809 SDValue Lo = Op.getOperand(0);
7810 SDValue Hi = Op.getOperand(1);
7811
7812 MVT ContainerVT = VecVT;
7813 if (VecVT.isFixedLengthVector())
7814 ContainerVT = getContainerForFixedLengthVector(VecVT);
7815
7816 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
7817
7818 SDValue Res =
7819 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
7820
7821 if (VecVT.isFixedLengthVector())
7822 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
7823
7824 return Res;
7825}
7826
7827// Custom-lower extensions from mask vectors by using a vselect either with 1
7828// for zero/any-extension or -1 for sign-extension:
7829// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
7830// Note that any-extension is lowered identically to zero-extension.
7831SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
7832 int64_t ExtTrueVal) const {
7833 SDLoc DL(Op);
7834 MVT VecVT = Op.getSimpleValueType();
7835 SDValue Src = Op.getOperand(0);
7836 // Only custom-lower extensions from mask types
7837 assert(Src.getValueType().isVector() &&
7838 Src.getValueType().getVectorElementType() == MVT::i1);
7839
7840 if (VecVT.isScalableVector()) {
7841 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
7842 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, VecVT);
7843 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
7844 }
7845
7846 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
7847 MVT I1ContainerVT =
7848 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
7849
7850 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
7851
7852 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
7853
7854 MVT XLenVT = Subtarget.getXLenVT();
7855 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
7856 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
7857
7858 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7859 DAG.getUNDEF(ContainerVT), SplatZero, VL);
7860 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7861 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
7862 SDValue Select =
7863 DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal,
7864 SplatZero, DAG.getUNDEF(ContainerVT), VL);
7865
7866 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
7867}
7868
7869SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
7870 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
7871 MVT ExtVT = Op.getSimpleValueType();
7872 // Only custom-lower extensions from fixed-length vector types.
7873 if (!ExtVT.isFixedLengthVector())
7874 return Op;
7875 MVT VT = Op.getOperand(0).getSimpleValueType();
7876 // Grab the canonical container type for the extended type. Infer the smaller
7877 // type from that to ensure the same number of vector elements, as we know
7878 // the LMUL will be sufficient to hold the smaller type.
7879 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
7880 // Get the extended container type manually to ensure the same number of
7881 // vector elements between source and dest.
7882 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
7883 ContainerExtVT.getVectorElementCount());
7884
7885 SDValue Op1 =
7886 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
7887
7888 SDLoc DL(Op);
7889 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
7890
7891 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
7892
7893 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
7894}
7895
7896// Custom-lower truncations from vectors to mask vectors by using a mask and a
7897// setcc operation:
7898// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
7899SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
7900 SelectionDAG &DAG) const {
7901 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
7902 SDLoc DL(Op);
7903 EVT MaskVT = Op.getValueType();
7904 // Only expect to custom-lower truncations to mask types
7905 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
7906 "Unexpected type for vector mask lowering");
7907 SDValue Src = Op.getOperand(0);
7908 MVT VecVT = Src.getSimpleValueType();
7909 SDValue Mask, VL;
7910 if (IsVPTrunc) {
7911 Mask = Op.getOperand(1);
7912 VL = Op.getOperand(2);
7913 }
7914 // If this is a fixed vector, we need to convert it to a scalable vector.
7915 MVT ContainerVT = VecVT;
7916
7917 if (VecVT.isFixedLengthVector()) {
7918 ContainerVT = getContainerForFixedLengthVector(VecVT);
7919 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
7920 if (IsVPTrunc) {
7921 MVT MaskContainerVT =
7922 getContainerForFixedLengthVector(Mask.getSimpleValueType());
7923 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
7924 }
7925 }
7926
7927 if (!IsVPTrunc) {
7928 std::tie(Mask, VL) =
7929 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
7930 }
7931
7932 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
7933 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
7934
7935 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7936 DAG.getUNDEF(ContainerVT), SplatOne, VL);
7937 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
7938 DAG.getUNDEF(ContainerVT), SplatZero, VL);
7939
7940 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
7941 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
7942 DAG.getUNDEF(ContainerVT), Mask, VL);
7943 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
7944 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
7945 DAG.getUNDEF(MaskContainerVT), Mask, VL});
7946 if (MaskVT.isFixedLengthVector())
7947 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
7948 return Trunc;
7949}
7950
7951SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
7952 SelectionDAG &DAG) const {
7953 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
7954 SDLoc DL(Op);
7955
7956 MVT VT = Op.getSimpleValueType();
7957 // Only custom-lower vector truncates
7958 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
7959
7960 // Truncates to mask types are handled differently
7961 if (VT.getVectorElementType() == MVT::i1)
7962 return lowerVectorMaskTruncLike(Op, DAG);
7963
7964 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
7965 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
7966 // truncate by one power of two at a time.
7967 MVT DstEltVT = VT.getVectorElementType();
7968
7969 SDValue Src = Op.getOperand(0);
7970 MVT SrcVT = Src.getSimpleValueType();
7971 MVT SrcEltVT = SrcVT.getVectorElementType();
7972
7973 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
7974 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
7975 "Unexpected vector truncate lowering");
7976
7977 MVT ContainerVT = SrcVT;
7978 SDValue Mask, VL;
7979 if (IsVPTrunc) {
7980 Mask = Op.getOperand(1);
7981 VL = Op.getOperand(2);
7982 }
7983 if (SrcVT.isFixedLengthVector()) {
7984 ContainerVT = getContainerForFixedLengthVector(SrcVT);
7985 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
7986 if (IsVPTrunc) {
7987 MVT MaskVT = getMaskTypeFor(ContainerVT);
7988 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
7989 }
7990 }
7991
7992 SDValue Result = Src;
7993 if (!IsVPTrunc) {
7994 std::tie(Mask, VL) =
7995 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
7996 }
7997
7998 LLVMContext &Context = *DAG.getContext();
7999 const ElementCount Count = ContainerVT.getVectorElementCount();
8000 do {
8001 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
8002 EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
8003 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
8004 Mask, VL);
8005 } while (SrcEltVT != DstEltVT);
8006
8007 if (SrcVT.isFixedLengthVector())
8008 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
8009
8010 return Result;
8011}
8012
8013SDValue
8014RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
8015 SelectionDAG &DAG) const {
8016 SDLoc DL(Op);
8017 SDValue Chain = Op.getOperand(0);
8018 SDValue Src = Op.getOperand(1);
8019 MVT VT = Op.getSimpleValueType();
8020 MVT SrcVT = Src.getSimpleValueType();
8021 MVT ContainerVT = VT;
8022 if (VT.isFixedLengthVector()) {
8023 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8024 ContainerVT =
8025 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8026 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8027 }
8028
8029 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8030
8031 // RVV can only widen/truncate fp to types double/half the size as the source.
8032 if ((VT.getVectorElementType() == MVT::f64 &&
8033 SrcVT.getVectorElementType() == MVT::f16) ||
8034 (VT.getVectorElementType() == MVT::f16 &&
8035 SrcVT.getVectorElementType() == MVT::f64)) {
8036 // For double rounding, the intermediate rounding should be round-to-odd.
8037 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8040 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8041 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
8042 Chain, Src, Mask, VL);
8043 Chain = Src.getValue(1);
8044 }
8045
8046 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8049 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
8050 Chain, Src, Mask, VL);
8051 if (VT.isFixedLengthVector()) {
8052 // StrictFP operations have two result values. Their lowered result should
8053 // have same result count.
8054 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
8055 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
8056 }
8057 return Res;
8058}
8059
8060SDValue
8061RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
8062 SelectionDAG &DAG) const {
8063 bool IsVP =
8064 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
8065 bool IsExtend =
8066 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
8067 // RVV can only do truncate fp to types half the size as the source. We
8068 // custom-lower f64->f16 rounds via RVV's round-to-odd float
8069 // conversion instruction.
8070 SDLoc DL(Op);
8071 MVT VT = Op.getSimpleValueType();
8072
8073 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8074
8075 SDValue Src = Op.getOperand(0);
8076 MVT SrcVT = Src.getSimpleValueType();
8077
8078 bool IsDirectExtend = IsExtend && (VT.getVectorElementType() != MVT::f64 ||
8079 SrcVT.getVectorElementType() != MVT::f16);
8080 bool IsDirectTrunc = !IsExtend && (VT.getVectorElementType() != MVT::f16 ||
8081 SrcVT.getVectorElementType() != MVT::f64);
8082
8083 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
8084
8085 // Prepare any fixed-length vector operands.
8086 MVT ContainerVT = VT;
8087 SDValue Mask, VL;
8088 if (IsVP) {
8089 Mask = Op.getOperand(1);
8090 VL = Op.getOperand(2);
8091 }
8092 if (VT.isFixedLengthVector()) {
8093 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8094 ContainerVT =
8095 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8096 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8097 if (IsVP) {
8098 MVT MaskVT = getMaskTypeFor(ContainerVT);
8099 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8100 }
8101 }
8102
8103 if (!IsVP)
8104 std::tie(Mask, VL) =
8105 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8106
8107 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
8108
8109 if (IsDirectConv) {
8110 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
8111 if (VT.isFixedLengthVector())
8112 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
8113 return Src;
8114 }
8115
8116 unsigned InterConvOpc =
8118
8119 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8120 SDValue IntermediateConv =
8121 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
8122 SDValue Result =
8123 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
8124 if (VT.isFixedLengthVector())
8125 return convertFromScalableVector(VT, Result, DAG, Subtarget);
8126 return Result;
8127}
8128
8129// Given a scalable vector type and an index into it, returns the type for the
8130// smallest subvector that the index fits in. This can be used to reduce LMUL
8131// for operations like vslidedown.
8132//
8133// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
8134static std::optional<MVT>
8135getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
8136 const RISCVSubtarget &Subtarget) {
8137 assert(VecVT.isScalableVector());
8138 const unsigned EltSize = VecVT.getScalarSizeInBits();
8139 const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
8140 const unsigned MinVLMAX = VectorBitsMin / EltSize;
8141 MVT SmallerVT;
8142 if (MaxIdx < MinVLMAX)
8143 SmallerVT = getLMUL1VT(VecVT);
8144 else if (MaxIdx < MinVLMAX * 2)
8145 SmallerVT = getLMUL1VT(VecVT).getDoubleNumVectorElementsVT();
8146 else if (MaxIdx < MinVLMAX * 4)
8147 SmallerVT = getLMUL1VT(VecVT)
8150 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
8151 return std::nullopt;
8152 return SmallerVT;
8153}
8154
8155// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
8156// first position of a vector, and that vector is slid up to the insert index.
8157// By limiting the active vector length to index+1 and merging with the
8158// original vector (with an undisturbed tail policy for elements >= VL), we
8159// achieve the desired result of leaving all elements untouched except the one
8160// at VL-1, which is replaced with the desired value.
8161SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
8162 SelectionDAG &DAG) const {
8163 SDLoc DL(Op);
8164 MVT VecVT = Op.getSimpleValueType();
8165 SDValue Vec = Op.getOperand(0);
8166 SDValue Val = Op.getOperand(1);
8167 SDValue Idx = Op.getOperand(2);
8168
8169 if (VecVT.getVectorElementType() == MVT::i1) {
8170 // FIXME: For now we just promote to an i8 vector and insert into that,
8171 // but this is probably not optimal.
8172 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
8173 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
8174 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
8175 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
8176 }
8177
8178 MVT ContainerVT = VecVT;
8179 // If the operand is a fixed-length vector, convert to a scalable one.
8180 if (VecVT.isFixedLengthVector()) {
8181 ContainerVT = getContainerForFixedLengthVector(VecVT);
8182 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8183 }
8184
8185 // If we know the index we're going to insert at, we can shrink Vec so that
8186 // we're performing the scalar inserts and slideup on a smaller LMUL.
8187 MVT OrigContainerVT = ContainerVT;
8188 SDValue OrigVec = Vec;
8189 SDValue AlignedIdx;
8190 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
8191 const unsigned OrigIdx = IdxC->getZExtValue();
8192 // Do we know an upper bound on LMUL?
8193 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
8194 DL, DAG, Subtarget)) {
8195 ContainerVT = *ShrunkVT;
8196 AlignedIdx = DAG.getVectorIdxConstant(0, DL);
8197 }
8198
8199 // If we're compiling for an exact VLEN value, we can always perform
8200 // the insert in m1 as we can determine the register corresponding to
8201 // the index in the register group.
8202 const MVT M1VT = getLMUL1VT(ContainerVT);
8203 if (auto VLEN = Subtarget.getRealVLen();
8204 VLEN && ContainerVT.bitsGT(M1VT)) {
8205 EVT ElemVT = VecVT.getVectorElementType();
8206 unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
8207 unsigned RemIdx = OrigIdx % ElemsPerVReg;
8208 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
8209 unsigned ExtractIdx =
8210 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
8211 AlignedIdx = DAG.getVectorIdxConstant(ExtractIdx, DL);
8212 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
8213 ContainerVT = M1VT;
8214 }
8215
8216 if (AlignedIdx)
8217 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
8218 AlignedIdx);
8219 }
8220
8221 MVT XLenVT = Subtarget.getXLenVT();
8222
8223 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
8224 // Even i64-element vectors on RV32 can be lowered without scalar
8225 // legalization if the most-significant 32 bits of the value are not affected
8226 // by the sign-extension of the lower 32 bits.
8227 // TODO: We could also catch sign extensions of a 32-bit value.
8228 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
8229 const auto *CVal = cast<ConstantSDNode>(Val);
8230 if (isInt<32>(CVal->getSExtValue())) {
8231 IsLegalInsert = true;
8232 Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
8233 }
8234 }
8235
8236 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8237
8238 SDValue ValInVec;
8239
8240 if (IsLegalInsert) {
8241 unsigned Opc =
8243 if (isNullConstant(Idx)) {
8244 if (!VecVT.isFloatingPoint())
8245 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
8246 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
8247
8248 if (AlignedIdx)
8249 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8250 Vec, AlignedIdx);
8251 if (!VecVT.isFixedLengthVector())
8252 return Vec;
8253 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
8254 }
8255 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
8256 } else {
8257 // On RV32, i64-element vectors must be specially handled to place the
8258 // value at element 0, by using two vslide1down instructions in sequence on
8259 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
8260 // this.
8261 SDValue ValLo, ValHi;
8262 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
8263 MVT I32ContainerVT =
8264 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
8265 SDValue I32Mask =
8266 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
8267 // Limit the active VL to two.
8268 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
8269 // If the Idx is 0 we can insert directly into the vector.
8270 if (isNullConstant(Idx)) {
8271 // First slide in the lo value, then the hi in above it. We use slide1down
8272 // to avoid the register group overlap constraint of vslide1up.
8273 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8274 Vec, Vec, ValLo, I32Mask, InsertI64VL);
8275 // If the source vector is undef don't pass along the tail elements from
8276 // the previous slide1down.
8277 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
8278 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8279 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
8280 // Bitcast back to the right container type.
8281 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
8282
8283 if (AlignedIdx)
8284 ValInVec =
8285 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8286 ValInVec, AlignedIdx);
8287 if (!VecVT.isFixedLengthVector())
8288 return ValInVec;
8289 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
8290 }
8291
8292 // First slide in the lo value, then the hi in above it. We use slide1down
8293 // to avoid the register group overlap constraint of vslide1up.
8294 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8295 DAG.getUNDEF(I32ContainerVT),
8296 DAG.getUNDEF(I32ContainerVT), ValLo,
8297 I32Mask, InsertI64VL);
8298 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
8299 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
8300 I32Mask, InsertI64VL);
8301 // Bitcast back to the right container type.
8302 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
8303 }
8304
8305 // Now that the value is in a vector, slide it into position.
8306 SDValue InsertVL =
8307 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
8308
8309 // Use tail agnostic policy if Idx is the last index of Vec.
8311 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
8312 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
8313 Policy = RISCVII::TAIL_AGNOSTIC;
8314 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
8315 Idx, Mask, InsertVL, Policy);
8316
8317 if (AlignedIdx)
8318 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
8319 Slideup, AlignedIdx);
8320 if (!VecVT.isFixedLengthVector())
8321 return Slideup;
8322 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
8323}
8324
8325// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
8326// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
8327// types this is done using VMV_X_S to allow us to glean information about the
8328// sign bits of the result.
8329SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
8330 SelectionDAG &DAG) const {
8331 SDLoc DL(Op);
8332 SDValue Idx = Op.getOperand(1);
8333 SDValue Vec = Op.getOperand(0);
8334 EVT EltVT = Op.getValueType();
8335 MVT VecVT = Vec.getSimpleValueType();
8336 MVT XLenVT = Subtarget.getXLenVT();
8337
8338 if (VecVT.getVectorElementType() == MVT::i1) {
8339 // Use vfirst.m to extract the first bit.
8340 if (isNullConstant(Idx)) {
8341 MVT ContainerVT = VecVT;
8342 if (VecVT.isFixedLengthVector()) {
8343 ContainerVT = getContainerForFixedLengthVector(VecVT);
8344 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8345 }
8346 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8347 SDValue Vfirst =
8348 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
8349 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
8350 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
8351 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
8352 }
8353 if (VecVT.isFixedLengthVector()) {
8354 unsigned NumElts = VecVT.getVectorNumElements();
8355 if (NumElts >= 8) {
8356 MVT WideEltVT;
8357 unsigned WidenVecLen;
8358 SDValue ExtractElementIdx;
8359 SDValue ExtractBitIdx;
8360 unsigned MaxEEW = Subtarget.getELen();
8361 MVT LargestEltVT = MVT::getIntegerVT(
8362 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
8363 if (NumElts <= LargestEltVT.getSizeInBits()) {
8364 assert(isPowerOf2_32(NumElts) &&
8365 "the number of elements should be power of 2");
8366 WideEltVT = MVT::getIntegerVT(NumElts);
8367 WidenVecLen = 1;
8368 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
8369 ExtractBitIdx = Idx;
8370 } else {
8371 WideEltVT = LargestEltVT;
8372 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
8373 // extract element index = index / element width
8374 ExtractElementIdx = DAG.getNode(
8375 ISD::SRL, DL, XLenVT, Idx,
8376 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
8377 // mask bit index = index % element width
8378 ExtractBitIdx = DAG.getNode(
8379 ISD::AND, DL, XLenVT, Idx,
8380 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
8381 }
8382 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
8383 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
8384 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
8385 Vec, ExtractElementIdx);
8386 // Extract the bit from GPR.
8387 SDValue ShiftRight =
8388 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
8389 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
8390 DAG.getConstant(1, DL, XLenVT));
8391 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
8392 }
8393 }
8394 // Otherwise, promote to an i8 vector and extract from that.
8395 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
8396 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
8397 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
8398 }
8399
8400 // If this is a fixed vector, we need to convert it to a scalable vector.
8401 MVT ContainerVT = VecVT;
8402 if (VecVT.isFixedLengthVector()) {
8403 ContainerVT = getContainerForFixedLengthVector(VecVT);
8404 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
8405 }
8406
8407 // If we're compiling for an exact VLEN value and we have a known
8408 // constant index, we can always perform the extract in m1 (or
8409 // smaller) as we can determine the register corresponding to
8410 // the index in the register group.
8411 const auto VLen = Subtarget.getRealVLen();
8412 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
8413 IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
8414 MVT M1VT = getLMUL1VT(ContainerVT);
8415 unsigned OrigIdx = IdxC->getZExtValue();
8416 EVT ElemVT = VecVT.getVectorElementType();
8417 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
8418 unsigned RemIdx = OrigIdx % ElemsPerVReg;
8419 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
8420 unsigned ExtractIdx =
8421 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
8422 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
8423 DAG.getVectorIdxConstant(ExtractIdx, DL));
8424 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
8425 ContainerVT = M1VT;
8426 }
8427
8428 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
8429 // contains our index.
8430 std::optional<uint64_t> MaxIdx;
8431 if (VecVT.isFixedLengthVector())
8432 MaxIdx = VecVT.getVectorNumElements() - 1;
8433 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
8434 MaxIdx = IdxC->getZExtValue();
8435 if (MaxIdx) {
8436 if (auto SmallerVT =
8437 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
8438 ContainerVT = *SmallerVT;
8439 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
8440 DAG.getConstant(0, DL, XLenVT));
8441 }
8442 }
8443
8444 // If after narrowing, the required slide is still greater than LMUL2,
8445 // fallback to generic expansion and go through the stack. This is done
8446 // for a subtle reason: extracting *all* elements out of a vector is
8447 // widely expected to be linear in vector size, but because vslidedown
8448 // is linear in LMUL, performing N extracts using vslidedown becomes
8449 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
8450 // seems to have the same problem (the store is linear in LMUL), but the
8451 // generic expansion *memoizes* the store, and thus for many extracts of
8452 // the same vector we end up with one store and a bunch of loads.
8453 // TODO: We don't have the same code for insert_vector_elt because we
8454 // have BUILD_VECTOR and handle the degenerate case there. Should we
8455 // consider adding an inverse BUILD_VECTOR node?
8456 MVT LMUL2VT = getLMUL1VT(ContainerVT).getDoubleNumVectorElementsVT();
8457 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
8458 return SDValue();
8459
8460 // If the index is 0, the vector is already in the right position.
8461 if (!isNullConstant(Idx)) {
8462 // Use a VL of 1 to avoid processing more elements than we need.
8463 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
8464 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
8465 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
8466 }
8467
8468 if (!EltVT.isInteger()) {
8469 // Floating-point extracts are handled in TableGen.
8470 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
8471 DAG.getVectorIdxConstant(0, DL));
8472 }
8473
8474 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
8475 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
8476}
8477
8478// Some RVV intrinsics may claim that they want an integer operand to be
8479// promoted or expanded.
8481 const RISCVSubtarget &Subtarget) {
8482 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
8483 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
8484 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
8485 "Unexpected opcode");
8486
8487 if (!Subtarget.hasVInstructions())
8488 return SDValue();
8489
8490 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
8491 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
8492 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
8493
8494 SDLoc DL(Op);
8495
8497 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
8498 if (!II || !II->hasScalarOperand())
8499 return SDValue();
8500
8501 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
8502 assert(SplatOp < Op.getNumOperands());
8503
8504 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
8505 SDValue &ScalarOp = Operands[SplatOp];
8506 MVT OpVT = ScalarOp.getSimpleValueType();
8507 MVT XLenVT = Subtarget.getXLenVT();
8508
8509 // If this isn't a scalar, or its type is XLenVT we're done.
8510 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
8511 return SDValue();
8512
8513 // Simplest case is that the operand needs to be promoted to XLenVT.
8514 if (OpVT.bitsLT(XLenVT)) {
8515 // If the operand is a constant, sign extend to increase our chances
8516 // of being able to use a .vi instruction. ANY_EXTEND would become a
8517 // a zero extend and the simm5 check in isel would fail.
8518 // FIXME: Should we ignore the upper bits in isel instead?
8519 unsigned ExtOpc =
8520 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
8521 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
8522 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8523 }
8524
8525 // Use the previous operand to get the vXi64 VT. The result might be a mask
8526 // VT for compares. Using the previous operand assumes that the previous
8527 // operand will never have a smaller element size than a scalar operand and
8528 // that a widening operation never uses SEW=64.
8529 // NOTE: If this fails the below assert, we can probably just find the
8530 // element count from any operand or result and use it to construct the VT.
8531 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
8532 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
8533
8534 // The more complex case is when the scalar is larger than XLenVT.
8535 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
8536 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
8537
8538 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
8539 // instruction to sign-extend since SEW>XLEN.
8540 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
8541 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
8542 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8543 }
8544
8545 switch (IntNo) {
8546 case Intrinsic::riscv_vslide1up:
8547 case Intrinsic::riscv_vslide1down:
8548 case Intrinsic::riscv_vslide1up_mask:
8549 case Intrinsic::riscv_vslide1down_mask: {
8550 // We need to special case these when the scalar is larger than XLen.
8551 unsigned NumOps = Op.getNumOperands();
8552 bool IsMasked = NumOps == 7;
8553
8554 // Convert the vector source to the equivalent nxvXi32 vector.
8555 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
8556 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
8557 SDValue ScalarLo, ScalarHi;
8558 std::tie(ScalarLo, ScalarHi) =
8559 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
8560
8561 // Double the VL since we halved SEW.
8562 SDValue AVL = getVLOperand(Op);
8563 SDValue I32VL;
8564
8565 // Optimize for constant AVL
8566 if (isa<ConstantSDNode>(AVL)) {
8567 const auto [MinVLMAX, MaxVLMAX] =
8569
8570 uint64_t AVLInt = AVL->getAsZExtVal();
8571 if (AVLInt <= MinVLMAX) {
8572 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
8573 } else if (AVLInt >= 2 * MaxVLMAX) {
8574 // Just set vl to VLMAX in this situation
8576 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
8577 unsigned Sew = RISCVVType::encodeSEW(I32VT.getScalarSizeInBits());
8578 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
8579 SDValue SETVLMAX = DAG.getTargetConstant(
8580 Intrinsic::riscv_vsetvlimax, DL, MVT::i32);
8581 I32VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVLMAX, SEW,
8582 LMUL);
8583 } else {
8584 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
8585 // is related to the hardware implementation.
8586 // So let the following code handle
8587 }
8588 }
8589 if (!I32VL) {
8591 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
8592 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
8593 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
8594 SDValue SETVL =
8595 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
8596 // Using vsetvli instruction to get actually used length which related to
8597 // the hardware implementation
8598 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
8599 SEW, LMUL);
8600 I32VL =
8601 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
8602 }
8603
8604 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
8605
8606 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
8607 // instructions.
8608 SDValue Passthru;
8609 if (IsMasked)
8610 Passthru = DAG.getUNDEF(I32VT);
8611 else
8612 Passthru = DAG.getBitcast(I32VT, Operands[1]);
8613
8614 if (IntNo == Intrinsic::riscv_vslide1up ||
8615 IntNo == Intrinsic::riscv_vslide1up_mask) {
8616 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
8617 ScalarHi, I32Mask, I32VL);
8618 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
8619 ScalarLo, I32Mask, I32VL);
8620 } else {
8621 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
8622 ScalarLo, I32Mask, I32VL);
8623 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
8624 ScalarHi, I32Mask, I32VL);
8625 }
8626
8627 // Convert back to nxvXi64.
8628 Vec = DAG.getBitcast(VT, Vec);
8629
8630 if (!IsMasked)
8631 return Vec;
8632 // Apply mask after the operation.
8633 SDValue Mask = Operands[NumOps - 3];
8634 SDValue MaskedOff = Operands[1];
8635 // Assume Policy operand is the last operand.
8636 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();
8637 // We don't need to select maskedoff if it's undef.
8638 if (MaskedOff.isUndef())
8639 return Vec;
8640 // TAMU
8641 if (Policy == RISCVII::TAIL_AGNOSTIC)
8642 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
8643 DAG.getUNDEF(VT), AVL);
8644 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
8645 // It's fine because vmerge does not care mask policy.
8646 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
8647 MaskedOff, AVL);
8648 }
8649 }
8650
8651 // We need to convert the scalar to a splat vector.
8652 SDValue VL = getVLOperand(Op);
8653 assert(VL.getValueType() == XLenVT);
8654 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
8655 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
8656}
8657
8658// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
8659// scalable vector llvm.get.vector.length for now.
8660//
8661// We need to convert from a scalable VF to a vsetvli with VLMax equal to
8662// (vscale * VF). The vscale and VF are independent of element width. We use
8663// SEW=8 for the vsetvli because it is the only element width that supports all
8664// fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is
8665// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
8666// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
8667// SEW and LMUL are better for the surrounding vector instructions.
8669 const RISCVSubtarget &Subtarget) {
8670 MVT XLenVT = Subtarget.getXLenVT();
8671
8672 // The smallest LMUL is only valid for the smallest element width.
8673 const unsigned ElementWidth = 8;
8674
8675 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
8676 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
8677 // We don't support VF==1 with ELEN==32.
8678 [[maybe_unused]] unsigned MinVF =
8679 RISCV::RVVBitsPerBlock / Subtarget.getELen();
8680
8681 [[maybe_unused]] unsigned VF = N->getConstantOperandVal(2);
8682 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
8683 "Unexpected VF");
8684
8685 bool Fractional = VF < LMul1VF;
8686 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
8687 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
8688 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
8689
8690 SDLoc DL(N);
8691
8692 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
8693 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
8694
8695 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
8696
8697 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
8698 SDValue Res =
8699 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
8700 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
8701}
8702
8703static inline void promoteVCIXScalar(const SDValue &Op,
8705 SelectionDAG &DAG) {
8706 const RISCVSubtarget &Subtarget =
8708
8709 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
8710 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
8711 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
8712 SDLoc DL(Op);
8713
8715 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
8716 if (!II || !II->hasScalarOperand())
8717 return;
8718
8719 unsigned SplatOp = II->ScalarOperand + 1;
8720 assert(SplatOp < Op.getNumOperands());
8721
8722 SDValue &ScalarOp = Operands[SplatOp];
8723 MVT OpVT = ScalarOp.getSimpleValueType();
8724 MVT XLenVT = Subtarget.getXLenVT();
8725
8726 // The code below is partially copied from lowerVectorIntrinsicScalars.
8727 // If this isn't a scalar, or its type is XLenVT we're done.
8728 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
8729 return;
8730
8731 // Manually emit promote operation for scalar operation.
8732 if (OpVT.bitsLT(XLenVT)) {
8733 unsigned ExtOpc =
8734 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
8735 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
8736 }
8737
8738 return;
8739}
8740
8741static void processVCIXOperands(SDValue &OrigOp,
8743 SelectionDAG &DAG) {
8744 promoteVCIXScalar(OrigOp, Operands, DAG);
8745 const RISCVSubtarget &Subtarget =
8747 for (SDValue &V : Operands) {
8748 EVT ValType = V.getValueType();
8749 if (ValType.isVector() && ValType.isFloatingPoint()) {
8750 MVT InterimIVT =
8751 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
8752 ValType.getVectorElementCount());
8753 V = DAG.getBitcast(InterimIVT, V);
8754 }
8755 if (ValType.isFixedLengthVector()) {
8756 MVT OpContainerVT = getContainerForFixedLengthVector(
8757 DAG, V.getSimpleValueType(), Subtarget);
8758 V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
8759 }
8760 }
8761}
8762
8763// LMUL * VLEN should be greater than or equal to EGS * SEW
8764static inline bool isValidEGW(int EGS, EVT VT,
8765 const RISCVSubtarget &Subtarget) {
8766 return (Subtarget.getRealMinVLen() *
8768 EGS * VT.getScalarSizeInBits();
8769}
8770
8771SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
8772 SelectionDAG &DAG) const {
8773 unsigned IntNo = Op.getConstantOperandVal(0);
8774 SDLoc DL(Op);
8775 MVT XLenVT = Subtarget.getXLenVT();
8776
8777 switch (IntNo) {
8778 default:
8779 break; // Don't custom lower most intrinsics.
8780 case Intrinsic::thread_pointer: {
8781 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8782 return DAG.getRegister(RISCV::X4, PtrVT);
8783 }
8784 case Intrinsic::riscv_orc_b:
8785 case Intrinsic::riscv_brev8:
8786 case Intrinsic::riscv_sha256sig0:
8787 case Intrinsic::riscv_sha256sig1:
8788 case Intrinsic::riscv_sha256sum0:
8789 case Intrinsic::riscv_sha256sum1:
8790 case Intrinsic::riscv_sm3p0:
8791 case Intrinsic::riscv_sm3p1: {
8792 unsigned Opc;
8793 switch (IntNo) {
8794 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
8795 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
8796 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
8797 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
8798 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
8799 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
8800 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
8801 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
8802 }
8803
8804 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8805 SDValue NewOp =
8806 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8807 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
8808 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8809 }
8810
8811 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
8812 }
8813 case Intrinsic::riscv_sm4ks:
8814 case Intrinsic::riscv_sm4ed: {
8815 unsigned Opc =
8816 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
8817
8818 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8819 SDValue NewOp0 =
8820 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8821 SDValue NewOp1 =
8822 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8823 SDValue Res =
8824 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, Op.getOperand(3));
8825 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8826 }
8827
8828 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
8829 Op.getOperand(3));
8830 }
8831 case Intrinsic::riscv_zip:
8832 case Intrinsic::riscv_unzip: {
8833 unsigned Opc =
8834 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
8835 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
8836 }
8837 case Intrinsic::riscv_mopr: {
8838 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8839 SDValue NewOp =
8840 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8841 SDValue Res = DAG.getNode(
8842 RISCVISD::MOPR, DL, MVT::i64, NewOp,
8843 DAG.getTargetConstant(Op.getConstantOperandVal(2), DL, MVT::i64));
8844 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8845 }
8846 return DAG.getNode(RISCVISD::MOPR, DL, XLenVT, Op.getOperand(1),
8847 Op.getOperand(2));
8848 }
8849
8850 case Intrinsic::riscv_moprr: {
8851 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8852 SDValue NewOp0 =
8853 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8854 SDValue NewOp1 =
8855 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8856 SDValue Res = DAG.getNode(
8857 RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
8858 DAG.getTargetConstant(Op.getConstantOperandVal(3), DL, MVT::i64));
8859 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8860 }
8861 return DAG.getNode(RISCVISD::MOPRR, DL, XLenVT, Op.getOperand(1),
8862 Op.getOperand(2), Op.getOperand(3));
8863 }
8864 case Intrinsic::riscv_clmul:
8865 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8866 SDValue NewOp0 =
8867 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8868 SDValue NewOp1 =
8869 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8870 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
8871 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8872 }
8873 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
8874 Op.getOperand(2));
8875 case Intrinsic::riscv_clmulh:
8876 case Intrinsic::riscv_clmulr: {
8877 unsigned Opc =
8878 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
8879 if (RV64LegalI32 && Subtarget.is64Bit() && Op.getValueType() == MVT::i32) {
8880 SDValue NewOp0 =
8881 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(1));
8882 SDValue NewOp1 =
8883 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2));
8884 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
8885 DAG.getConstant(32, DL, MVT::i64));
8886 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
8887 DAG.getConstant(32, DL, MVT::i64));
8888 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
8889 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
8890 DAG.getConstant(32, DL, MVT::i64));
8891 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res);
8892 }
8893
8894 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
8895 }
8896 case Intrinsic::experimental_get_vector_length:
8897 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
8898 case Intrinsic::riscv_vmv_x_s: {
8899 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
8900 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
8901 }
8902 case Intrinsic::riscv_vfmv_f_s:
8903 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
8904 Op.getOperand(1), DAG.getVectorIdxConstant(0, DL));
8905 case Intrinsic::riscv_vmv_v_x:
8906 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
8907 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
8908 Subtarget);
8909 case Intrinsic::riscv_vfmv_v_f:
8910 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
8911 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
8912 case Intrinsic::riscv_vmv_s_x: {
8913 SDValue Scalar = Op.getOperand(2);
8914
8915 if (Scalar.getValueType().bitsLE(XLenVT)) {
8916 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
8917 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
8918 Op.getOperand(1), Scalar, Op.getOperand(3));
8919 }
8920
8921 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
8922
8923 // This is an i64 value that lives in two scalar registers. We have to
8924 // insert this in a convoluted way. First we build vXi64 splat containing
8925 // the two values that we assemble using some bit math. Next we'll use
8926 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
8927 // to merge element 0 from our splat into the source vector.
8928 // FIXME: This is probably not the best way to do this, but it is
8929 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
8930 // point.
8931 // sw lo, (a0)
8932 // sw hi, 4(a0)
8933 // vlse vX, (a0)
8934 //
8935 // vid.v vVid
8936 // vmseq.vx mMask, vVid, 0
8937 // vmerge.vvm vDest, vSrc, vVal, mMask
8938 MVT VT = Op.getSimpleValueType();
8939 SDValue Vec = Op.getOperand(1);
8940 SDValue VL = getVLOperand(Op);
8941
8942 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
8943 if (Op.getOperand(1).isUndef())
8944 return SplattedVal;
8945 SDValue SplattedIdx =
8946 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
8947 DAG.getConstant(0, DL, MVT::i32), VL);
8948
8949 MVT MaskVT = getMaskTypeFor(VT);
8950 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
8951 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
8952 SDValue SelectCond =
8953 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
8954 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
8955 DAG.getUNDEF(MaskVT), Mask, VL});
8956 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,
8957 Vec, DAG.getUNDEF(VT), VL);
8958 }
8959 case Intrinsic::riscv_vfmv_s_f:
8960 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(),
8961 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
8962 // EGS * EEW >= 128 bits
8963 case Intrinsic::riscv_vaesdf_vv:
8964 case Intrinsic::riscv_vaesdf_vs:
8965 case Intrinsic::riscv_vaesdm_vv:
8966 case Intrinsic::riscv_vaesdm_vs:
8967 case Intrinsic::riscv_vaesef_vv:
8968 case Intrinsic::riscv_vaesef_vs:
8969 case Intrinsic::riscv_vaesem_vv:
8970 case Intrinsic::riscv_vaesem_vs:
8971 case Intrinsic::riscv_vaeskf1:
8972 case Intrinsic::riscv_vaeskf2:
8973 case Intrinsic::riscv_vaesz_vs:
8974 case Intrinsic::riscv_vsm4k:
8975 case Intrinsic::riscv_vsm4r_vv:
8976 case Intrinsic::riscv_vsm4r_vs: {
8977 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
8978 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
8979 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
8980 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
8981 return Op;
8982 }
8983 // EGS * EEW >= 256 bits
8984 case Intrinsic::riscv_vsm3c:
8985 case Intrinsic::riscv_vsm3me: {
8986 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
8987 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
8988 report_fatal_error("EGW should be greater than or equal to 8 * SEW.");
8989 return Op;
8990 }
8991 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
8992 case Intrinsic::riscv_vsha2ch:
8993 case Intrinsic::riscv_vsha2cl:
8994 case Intrinsic::riscv_vsha2ms: {
8995 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
8996 !Subtarget.hasStdExtZvknhb())
8997 report_fatal_error("SEW=64 needs Zvknhb to be enabled.");
8998 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
8999 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9000 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9001 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9002 return Op;
9003 }
9004 case Intrinsic::riscv_sf_vc_v_x:
9005 case Intrinsic::riscv_sf_vc_v_i:
9006 case Intrinsic::riscv_sf_vc_v_xv:
9007 case Intrinsic::riscv_sf_vc_v_iv:
9008 case Intrinsic::riscv_sf_vc_v_vv:
9009 case Intrinsic::riscv_sf_vc_v_fv:
9010 case Intrinsic::riscv_sf_vc_v_xvv:
9011 case Intrinsic::riscv_sf_vc_v_ivv:
9012 case Intrinsic::riscv_sf_vc_v_vvv:
9013 case Intrinsic::riscv_sf_vc_v_fvv:
9014 case Intrinsic::riscv_sf_vc_v_xvw:
9015 case Intrinsic::riscv_sf_vc_v_ivw:
9016 case Intrinsic::riscv_sf_vc_v_vvw:
9017 case Intrinsic::riscv_sf_vc_v_fvw: {
9018 MVT VT = Op.getSimpleValueType();
9019
9020 SmallVector<SDValue> Operands{Op->op_values()};
9022
9023 MVT RetVT = VT;
9024 if (VT.isFixedLengthVector())
9026 else if (VT.isFloatingPoint())
9029
9030 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);
9031
9032 if (VT.isFixedLengthVector())
9033 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
9034 else if (VT.isFloatingPoint())
9035 NewNode = DAG.getBitcast(VT, NewNode);
9036
9037 if (Op == NewNode)
9038 break;
9039
9040 return NewNode;
9041 }
9042 }
9043
9044 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9045}
9046
9048 unsigned Type) {
9049 SDLoc DL(Op);
9050 SmallVector<SDValue> Operands{Op->op_values()};
9051 Operands.erase(Operands.begin() + 1);
9052
9053 const RISCVSubtarget &Subtarget =
9055 MVT VT = Op.getSimpleValueType();
9056 MVT RetVT = VT;
9057 MVT FloatVT = VT;
9058
9059 if (VT.isFloatingPoint()) {
9060 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
9062 FloatVT = RetVT;
9063 }
9064 if (VT.isFixedLengthVector())
9066 Subtarget);
9067
9069
9070 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
9071 SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);
9072 SDValue Chain = NewNode.getValue(1);
9073
9074 if (VT.isFixedLengthVector())
9075 NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
9076 if (VT.isFloatingPoint())
9077 NewNode = DAG.getBitcast(VT, NewNode);
9078
9079 NewNode = DAG.getMergeValues({NewNode, Chain}, DL);
9080
9081 return NewNode;
9082}
9083
9085 unsigned Type) {
9086 SmallVector<SDValue> Operands{Op->op_values()};
9087 Operands.erase(Operands.begin() + 1);
9089
9090 return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
9091}
9092
9093SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
9094 SelectionDAG &DAG) const {
9095 unsigned IntNo = Op.getConstantOperandVal(1);
9096 switch (IntNo) {
9097 default:
9098 break;
9099 case Intrinsic::riscv_masked_strided_load: {
9100 SDLoc DL(Op);
9101 MVT XLenVT = Subtarget.getXLenVT();
9102
9103 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
9104 // the selection of the masked intrinsics doesn't do this for us.
9105 SDValue Mask = Op.getOperand(5);
9106 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
9107
9108 MVT VT = Op->getSimpleValueType(0);
9109 MVT ContainerVT = VT;
9110 if (VT.isFixedLengthVector())
9111 ContainerVT = getContainerForFixedLengthVector(VT);
9112
9113 SDValue PassThru = Op.getOperand(2);
9114 if (!IsUnmasked) {
9115 MVT MaskVT = getMaskTypeFor(ContainerVT);
9116 if (VT.isFixedLengthVector()) {
9117 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9118 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
9119 }
9120 }
9121
9122 auto *Load = cast<MemIntrinsicSDNode>(Op);
9123 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
9124 SDValue Ptr = Op.getOperand(3);
9125 SDValue Stride = Op.getOperand(4);
9126 SDValue Result, Chain;
9127
9128 // TODO: We restrict this to unmasked loads currently in consideration of
9129 // the complexity of handling all falses masks.
9130 MVT ScalarVT = ContainerVT.getVectorElementType();
9131 if (IsUnmasked && isNullConstant(Stride) && ContainerVT.isInteger()) {
9132 SDValue ScalarLoad =
9133 DAG.getExtLoad(ISD::ZEXTLOAD, DL, XLenVT, Load->getChain(), Ptr,
9134 ScalarVT, Load->getMemOperand());
9135 Chain = ScalarLoad.getValue(1);
9136 Result = lowerScalarSplat(SDValue(), ScalarLoad, VL, ContainerVT, DL, DAG,
9137 Subtarget);
9138 } else if (IsUnmasked && isNullConstant(Stride) && isTypeLegal(ScalarVT)) {
9139 SDValue ScalarLoad = DAG.getLoad(ScalarVT, DL, Load->getChain(), Ptr,
9140 Load->getMemOperand());
9141 Chain = ScalarLoad.getValue(1);
9142 Result = DAG.getSplat(ContainerVT, DL, ScalarLoad);
9143 } else {
9144 SDValue IntID = DAG.getTargetConstant(
9145 IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL,
9146 XLenVT);
9147
9148 SmallVector<SDValue, 8> Ops{Load->getChain(), IntID};
9149 if (IsUnmasked)
9150 Ops.push_back(DAG.getUNDEF(ContainerVT));
9151 else
9152 Ops.push_back(PassThru);
9153 Ops.push_back(Ptr);
9154 Ops.push_back(Stride);
9155 if (!IsUnmasked)
9156 Ops.push_back(Mask);
9157 Ops.push_back(VL);
9158 if (!IsUnmasked) {
9159 SDValue Policy =
9161 Ops.push_back(Policy);
9162 }
9163
9164 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
9165 Result =
9167 Load->getMemoryVT(), Load->getMemOperand());
9168 Chain = Result.getValue(1);
9169 }
9170 if (VT.isFixedLengthVector())
9171 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
9172 return DAG.getMergeValues({Result, Chain}, DL);
9173 }
9174 case Intrinsic::riscv_seg2_load:
9175 case Intrinsic::riscv_seg3_load:
9176 case Intrinsic::riscv_seg4_load:
9177 case Intrinsic::riscv_seg5_load:
9178 case Intrinsic::riscv_seg6_load:
9179 case Intrinsic::riscv_seg7_load:
9180 case Intrinsic::riscv_seg8_load: {
9181 SDLoc DL(Op);
9182 static const Intrinsic::ID VlsegInts[7] = {
9183 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
9184 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
9185 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
9186 Intrinsic::riscv_vlseg8};
9187 unsigned NF = Op->getNumValues() - 1;
9188 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
9189 MVT XLenVT = Subtarget.getXLenVT();
9190 MVT VT = Op->getSimpleValueType(0);
9191 MVT ContainerVT = getContainerForFixedLengthVector(VT);
9192
9193 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
9194 Subtarget);
9195 SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
9196 auto *Load = cast<MemIntrinsicSDNode>(Op);
9197 SmallVector<EVT, 9> ContainerVTs(NF, ContainerVT);
9198 ContainerVTs.push_back(MVT::Other);
9199 SDVTList VTs = DAG.getVTList(ContainerVTs);
9200 SmallVector<SDValue, 12> Ops = {Load->getChain(), IntID};
9201 Ops.insert(Ops.end(), NF, DAG.getUNDEF(ContainerVT));
9202 Ops.push_back(Op.getOperand(2));
9203 Ops.push_back(VL);
9204 SDValue Result =
9206 Load->getMemoryVT(), Load->getMemOperand());
9208 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++)
9209 Results.push_back(convertFromScalableVector(VT, Result.getValue(RetIdx),
9210 DAG, Subtarget));
9211 Results.push_back(Result.getValue(NF));
9212 return DAG.getMergeValues(Results, DL);
9213 }
9214 case Intrinsic::riscv_sf_vc_v_x_se:
9216 case Intrinsic::riscv_sf_vc_v_i_se:
9218 case Intrinsic::riscv_sf_vc_v_xv_se:
9220 case Intrinsic::riscv_sf_vc_v_iv_se:
9222 case Intrinsic::riscv_sf_vc_v_vv_se:
9224 case Intrinsic::riscv_sf_vc_v_fv_se:
9226 case Intrinsic::riscv_sf_vc_v_xvv_se:
9228 case Intrinsic::riscv_sf_vc_v_ivv_se:
9230 case Intrinsic::riscv_sf_vc_v_vvv_se:
9232 case Intrinsic::riscv_sf_vc_v_fvv_se:
9234 case Intrinsic::riscv_sf_vc_v_xvw_se:
9236 case Intrinsic::riscv_sf_vc_v_ivw_se:
9238 case Intrinsic::riscv_sf_vc_v_vvw_se:
9240 case Intrinsic::riscv_sf_vc_v_fvw_se:
9242 }
9243
9244 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9245}
9246
9247SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
9248 SelectionDAG &DAG) const {
9249 unsigned IntNo = Op.getConstantOperandVal(1);
9250 switch (IntNo) {
9251 default:
9252 break;
9253 case Intrinsic::riscv_masked_strided_store: {
9254 SDLoc DL(Op);
9255 MVT XLenVT = Subtarget.getXLenVT();
9256
9257 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
9258 // the selection of the masked intrinsics doesn't do this for us.
9259 SDValue Mask = Op.getOperand(5);
9260 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
9261
9262 SDValue Val = Op.getOperand(2);
9263 MVT VT = Val.getSimpleValueType();
9264 MVT ContainerVT = VT;
9265 if (VT.isFixedLengthVector()) {
9266 ContainerVT = getContainerForFixedLengthVector(VT);
9267 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
9268 }
9269 if (!IsUnmasked) {
9270 MVT MaskVT = getMaskTypeFor(ContainerVT);
9271 if (VT.isFixedLengthVector())
9272 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9273 }
9274
9275 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
9276
9277 SDValue IntID = DAG.getTargetConstant(
9278 IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL,
9279 XLenVT);
9280
9281 auto *Store = cast<MemIntrinsicSDNode>(Op);
9282 SmallVector<SDValue, 8> Ops{Store->getChain(), IntID};
9283 Ops.push_back(Val);
9284 Ops.push_back(Op.getOperand(3)); // Ptr
9285 Ops.push_back(Op.getOperand(4)); // Stride
9286 if (!IsUnmasked)
9287 Ops.push_back(Mask);
9288 Ops.push_back(VL);
9289
9290 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Store->getVTList(),
9291 Ops, Store->getMemoryVT(),
9292 Store->getMemOperand());
9293 }
9294 case Intrinsic::riscv_seg2_store:
9295 case Intrinsic::riscv_seg3_store:
9296 case Intrinsic::riscv_seg4_store:
9297 case Intrinsic::riscv_seg5_store:
9298 case Intrinsic::riscv_seg6_store:
9299 case Intrinsic::riscv_seg7_store:
9300 case Intrinsic::riscv_seg8_store: {
9301 SDLoc DL(Op);
9302 static const Intrinsic::ID VssegInts[] = {
9303 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
9304 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
9305 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
9306 Intrinsic::riscv_vsseg8};
9307 // Operands are (chain, int_id, vec*, ptr, vl)
9308 unsigned NF = Op->getNumOperands() - 4;
9309 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
9310 MVT XLenVT = Subtarget.getXLenVT();
9311 MVT VT = Op->getOperand(2).getSimpleValueType();
9312 MVT ContainerVT = getContainerForFixedLengthVector(VT);
9313
9314 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
9315 Subtarget);
9316 SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);
9317 SDValue Ptr = Op->getOperand(NF + 2);
9318
9319 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
9320 SmallVector<SDValue, 12> Ops = {FixedIntrinsic->getChain(), IntID};
9321 for (unsigned i = 0; i < NF; i++)
9323 ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget));
9324 Ops.append({Ptr, VL});
9325
9326 return DAG.getMemIntrinsicNode(
9327 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
9328 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
9329 }
9330 case Intrinsic::riscv_sf_vc_xv_se:
9332 case Intrinsic::riscv_sf_vc_iv_se:
9334 case Intrinsic::riscv_sf_vc_vv_se:
9336 case Intrinsic::riscv_sf_vc_fv_se:
9338 case Intrinsic::riscv_sf_vc_xvv_se:
9340 case Intrinsic::riscv_sf_vc_ivv_se:
9342 case Intrinsic::riscv_sf_vc_vvv_se:
9344 case Intrinsic::riscv_sf_vc_fvv_se:
9346 case Intrinsic::riscv_sf_vc_xvw_se:
9348 case Intrinsic::riscv_sf_vc_ivw_se:
9350 case Intrinsic::riscv_sf_vc_vvw_se:
9352 case Intrinsic::riscv_sf_vc_fvw_se:
9354 }
9355
9356 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9357}
9358
9359static unsigned getRVVReductionOp(unsigned ISDOpcode) {
9360 switch (ISDOpcode) {
9361 default:
9362 llvm_unreachable("Unhandled reduction");
9363 case ISD::VP_REDUCE_ADD:
9364 case ISD::VECREDUCE_ADD:
9366 case ISD::VP_REDUCE_UMAX:
9369 case ISD::VP_REDUCE_SMAX:
9372 case ISD::VP_REDUCE_UMIN:
9375 case ISD::VP_REDUCE_SMIN:
9378 case ISD::VP_REDUCE_AND:
9379 case ISD::VECREDUCE_AND:
9381 case ISD::VP_REDUCE_OR:
9382 case ISD::VECREDUCE_OR:
9384 case ISD::VP_REDUCE_XOR:
9385 case ISD::VECREDUCE_XOR:
9387 case ISD::VP_REDUCE_FADD:
9389 case ISD::VP_REDUCE_SEQ_FADD:
9391 case ISD::VP_REDUCE_FMAX:
9393 case ISD::VP_REDUCE_FMIN:
9395 }
9396
9397}
9398
9399SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
9400 SelectionDAG &DAG,
9401 bool IsVP) const {
9402 SDLoc DL(Op);
9403 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
9404 MVT VecVT = Vec.getSimpleValueType();
9405 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
9406 Op.getOpcode() == ISD::VECREDUCE_OR ||
9407 Op.getOpcode() == ISD::VECREDUCE_XOR ||
9408 Op.getOpcode() == ISD::VP_REDUCE_AND ||
9409 Op.getOpcode() == ISD::VP_REDUCE_OR ||
9410 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
9411 "Unexpected reduction lowering");
9412
9413 MVT XLenVT = Subtarget.getXLenVT();
9414
9415 MVT ContainerVT = VecVT;
9416 if (VecVT.isFixedLengthVector()) {
9417 ContainerVT = getContainerForFixedLengthVector(VecVT);
9418 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9419 }
9420
9421 SDValue Mask, VL;
9422 if (IsVP) {
9423 Mask = Op.getOperand(2);
9424 VL = Op.getOperand(3);
9425 } else {
9426 std::tie(Mask, VL) =
9427 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9428 }
9429
9430 unsigned BaseOpc;
9432 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
9433
9434 switch (Op.getOpcode()) {
9435 default:
9436 llvm_unreachable("Unhandled reduction");
9437 case ISD::VECREDUCE_AND:
9438 case ISD::VP_REDUCE_AND: {
9439 // vcpop ~x == 0
9440 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
9441 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
9442 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9443 CC = ISD::SETEQ;
9444 BaseOpc = ISD::AND;
9445 break;
9446 }
9447 case ISD::VECREDUCE_OR:
9448 case ISD::VP_REDUCE_OR:
9449 // vcpop x != 0
9450 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9451 CC = ISD::SETNE;
9452 BaseOpc = ISD::OR;
9453 break;
9454 case ISD::VECREDUCE_XOR:
9455 case ISD::VP_REDUCE_XOR: {
9456 // ((vcpop x) & 1) != 0
9457 SDValue One = DAG.getConstant(1, DL, XLenVT);
9458 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
9459 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
9460 CC = ISD::SETNE;
9461 BaseOpc = ISD::XOR;
9462 break;
9463 }
9464 }
9465
9466 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
9467 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
9468
9469 if (!IsVP)
9470 return SetCC;
9471
9472 // Now include the start value in the operation.
9473 // Note that we must return the start value when no elements are operated
9474 // upon. The vcpop instructions we've emitted in each case above will return
9475 // 0 for an inactive vector, and so we've already received the neutral value:
9476 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
9477 // can simply include the start value.
9478 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
9479}
9480
9481static bool isNonZeroAVL(SDValue AVL) {
9482 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
9483 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
9484 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
9485 (ImmAVL && ImmAVL->getZExtValue() >= 1);
9486}
9487
9488/// Helper to lower a reduction sequence of the form:
9489/// scalar = reduce_op vec, scalar_start
9490static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
9491 SDValue StartValue, SDValue Vec, SDValue Mask,
9492 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
9493 const RISCVSubtarget &Subtarget) {
9494 const MVT VecVT = Vec.getSimpleValueType();
9495 const MVT M1VT = getLMUL1VT(VecVT);
9496 const MVT XLenVT = Subtarget.getXLenVT();
9497 const bool NonZeroAVL = isNonZeroAVL(VL);
9498
9499 // The reduction needs an LMUL1 input; do the splat at either LMUL1
9500 // or the original VT if fractional.
9501 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
9502 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
9503 // prove it is non-zero. For the AVL=0 case, we need the scalar to
9504 // be the result of the reduction operation.
9505 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
9506 SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL,
9507 DAG, Subtarget);
9508 if (M1VT != InnerVT)
9509 InitialValue =
9510 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT, DAG.getUNDEF(M1VT),
9511 InitialValue, DAG.getVectorIdxConstant(0, DL));
9512 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
9513 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
9514 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
9515 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
9516 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
9517 DAG.getVectorIdxConstant(0, DL));
9518}
9519
9520SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
9521 SelectionDAG &DAG) const {
9522 SDLoc DL(Op);
9523 SDValue Vec = Op.getOperand(0);
9524 EVT VecEVT = Vec.getValueType();
9525
9526 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
9527
9528 // Due to ordering in legalize types we may have a vector type that needs to
9529 // be split. Do that manually so we can get down to a legal type.
9530 while (getTypeAction(*DAG.getContext(), VecEVT) ==
9532 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
9533 VecEVT = Lo.getValueType();
9534 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
9535 }
9536
9537 // TODO: The type may need to be widened rather than split. Or widened before
9538 // it can be split.
9539 if (!isTypeLegal(VecEVT))
9540 return SDValue();
9541
9542 MVT VecVT = VecEVT.getSimpleVT();
9543 MVT VecEltVT = VecVT.getVectorElementType();
9544 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
9545
9546 MVT ContainerVT = VecVT;
9547 if (VecVT.isFixedLengthVector()) {
9548 ContainerVT = getContainerForFixedLengthVector(VecVT);
9549 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9550 }
9551
9552 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9553
9554 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
9555 switch (BaseOpc) {
9556 case ISD::AND:
9557 case ISD::OR:
9558 case ISD::UMAX:
9559 case ISD::UMIN:
9560 case ISD::SMAX:
9561 case ISD::SMIN:
9562 StartV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Vec,
9563 DAG.getVectorIdxConstant(0, DL));
9564 }
9565 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
9566 Mask, VL, DL, DAG, Subtarget);
9567}
9568
9569// Given a reduction op, this function returns the matching reduction opcode,
9570// the vector SDValue and the scalar SDValue required to lower this to a
9571// RISCVISD node.
9572static std::tuple<unsigned, SDValue, SDValue>
9574 const RISCVSubtarget &Subtarget) {
9575 SDLoc DL(Op);
9576 auto Flags = Op->getFlags();
9577 unsigned Opcode = Op.getOpcode();
9578 switch (Opcode) {
9579 default:
9580 llvm_unreachable("Unhandled reduction");
9581 case ISD::VECREDUCE_FADD: {
9582 // Use positive zero if we can. It is cheaper to materialize.
9583 SDValue Zero =
9584 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
9585 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
9586 }
9588 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
9589 Op.getOperand(0));
9593 case ISD::VECREDUCE_FMAX: {
9594 SDValue Front =
9595 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op.getOperand(0),
9596 DAG.getVectorIdxConstant(0, DL));
9597 unsigned RVVOpc =
9598 (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)
9601 return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
9602 }
9603 }
9604}
9605
9606SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
9607 SelectionDAG &DAG) const {
9608 SDLoc DL(Op);
9609 MVT VecEltVT = Op.getSimpleValueType();
9610
9611 unsigned RVVOpcode;
9612 SDValue VectorVal, ScalarVal;
9613 std::tie(RVVOpcode, VectorVal, ScalarVal) =
9614 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
9615 MVT VecVT = VectorVal.getSimpleValueType();
9616
9617 MVT ContainerVT = VecVT;
9618 if (VecVT.isFixedLengthVector()) {
9619 ContainerVT = getContainerForFixedLengthVector(VecVT);
9620 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
9621 }
9622
9623 MVT ResVT = Op.getSimpleValueType();
9624 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9625 SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,
9626 VL, DL, DAG, Subtarget);
9627 if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
9628 Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
9629 return Res;
9630
9631 if (Op->getFlags().hasNoNaNs())
9632 return Res;
9633
9634 // Force output to NaN if any element is Nan.
9635 SDValue IsNan =
9636 DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
9637 {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),
9638 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
9639 MVT XLenVT = Subtarget.getXLenVT();
9640 SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL);
9641 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,
9642 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
9643 return DAG.getSelect(
9644 DL, ResVT, NoNaNs, Res,
9646 ResVT));
9647}
9648
9649SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
9650 SelectionDAG &DAG) const {
9651 SDLoc DL(Op);
9652 SDValue Vec = Op.getOperand(1);
9653 EVT VecEVT = Vec.getValueType();
9654
9655 // TODO: The type may need to be widened rather than split. Or widened before
9656 // it can be split.
9657 if (!isTypeLegal(VecEVT))
9658 return SDValue();
9659
9660 MVT VecVT = VecEVT.getSimpleVT();
9661 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
9662
9663 if (VecVT.isFixedLengthVector()) {
9664 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
9665 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9666 }
9667
9668 SDValue VL = Op.getOperand(3);
9669 SDValue Mask = Op.getOperand(2);
9670 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
9671 Vec, Mask, VL, DL, DAG, Subtarget);
9672}
9673
9674SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
9675 SelectionDAG &DAG) const {
9676 SDValue Vec = Op.getOperand(0);
9677 SDValue SubVec = Op.getOperand(1);
9678 MVT VecVT = Vec.getSimpleValueType();
9679 MVT SubVecVT = SubVec.getSimpleValueType();
9680
9681 SDLoc DL(Op);
9682 MVT XLenVT = Subtarget.getXLenVT();
9683 unsigned OrigIdx = Op.getConstantOperandVal(2);
9684 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
9685
9686 // We don't have the ability to slide mask vectors up indexed by their i1
9687 // elements; the smallest we can do is i8. Often we are able to bitcast to
9688 // equivalent i8 vectors. Note that when inserting a fixed-length vector
9689 // into a scalable one, we might not necessarily have enough scalable
9690 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
9691 if (SubVecVT.getVectorElementType() == MVT::i1 &&
9692 (OrigIdx != 0 || !Vec.isUndef())) {
9693 if (VecVT.getVectorMinNumElements() >= 8 &&
9694 SubVecVT.getVectorMinNumElements() >= 8) {
9695 assert(OrigIdx % 8 == 0 && "Invalid index");
9696 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
9697 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
9698 "Unexpected mask vector lowering");
9699 OrigIdx /= 8;
9700 SubVecVT =
9701 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
9702 SubVecVT.isScalableVector());
9703 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
9704 VecVT.isScalableVector());
9705 Vec = DAG.getBitcast(VecVT, Vec);
9706 SubVec = DAG.getBitcast(SubVecVT, SubVec);
9707 } else {
9708 // We can't slide this mask vector up indexed by its i1 elements.
9709 // This poses a problem when we wish to insert a scalable vector which
9710 // can't be re-expressed as a larger type. Just choose the slow path and
9711 // extend to a larger type, then truncate back down.
9712 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
9713 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
9714 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
9715 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
9716 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
9717 Op.getOperand(2));
9718 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
9719 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
9720 }
9721 }
9722
9723 // If the subvector vector is a fixed-length type, we cannot use subregister
9724 // manipulation to simplify the codegen; we don't know which register of a
9725 // LMUL group contains the specific subvector as we only know the minimum
9726 // register size. Therefore we must slide the vector group up the full
9727 // amount.
9728 if (SubVecVT.isFixedLengthVector()) {
9729 if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector())
9730 return Op;
9731 MVT ContainerVT = VecVT;
9732 if (VecVT.isFixedLengthVector()) {
9733 ContainerVT = getContainerForFixedLengthVector(VecVT);
9734 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9735 }
9736
9737 if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) {
9738 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
9739 DAG.getUNDEF(ContainerVT), SubVec,
9740 DAG.getVectorIdxConstant(0, DL));
9741 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
9742 return DAG.getBitcast(Op.getValueType(), SubVec);
9743 }
9744
9745 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
9746 DAG.getUNDEF(ContainerVT), SubVec,
9747 DAG.getVectorIdxConstant(0, DL));
9748 SDValue Mask =
9749 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
9750 // Set the vector length to only the number of elements we care about. Note
9751 // that for slideup this includes the offset.
9752 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
9753 SDValue VL = getVLOp(EndIndex, ContainerVT, DL, DAG, Subtarget);
9754
9755 // Use tail agnostic policy if we're inserting over Vec's tail.
9757 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
9758 Policy = RISCVII::TAIL_AGNOSTIC;
9759
9760 // If we're inserting into the lowest elements, use a tail undisturbed
9761 // vmv.v.v.
9762 if (OrigIdx == 0) {
9763 SubVec =
9764 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
9765 } else {
9766 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
9767 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
9768 SlideupAmt, Mask, VL, Policy);
9769 }
9770
9771 if (VecVT.isFixedLengthVector())
9772 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
9773 return DAG.getBitcast(Op.getValueType(), SubVec);
9774 }
9775
9776 unsigned SubRegIdx, RemIdx;
9777 std::tie(SubRegIdx, RemIdx) =
9779 VecVT, SubVecVT, OrigIdx, TRI);
9780
9781 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT);
9782 bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
9783 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
9784 SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
9785
9786 // 1. If the Idx has been completely eliminated and this subvector's size is
9787 // a vector register or a multiple thereof, or the surrounding elements are
9788 // undef, then this is a subvector insert which naturally aligns to a vector
9789 // register. These can easily be handled using subregister manipulation.
9790 // 2. If the subvector is smaller than a vector register, then the insertion
9791 // must preserve the undisturbed elements of the register. We do this by
9792 // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
9793 // (which resolves to a subregister copy), performing a VSLIDEUP to place the
9794 // subvector within the vector register, and an INSERT_SUBVECTOR of that
9795 // LMUL=1 type back into the larger vector (resolving to another subregister
9796 // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
9797 // to avoid allocating a large register group to hold our subvector.
9798 if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef()))
9799 return Op;
9800
9801 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
9802 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
9803 // (in our case undisturbed). This means we can set up a subvector insertion
9804 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
9805 // size of the subvector.
9806 MVT InterSubVT = VecVT;
9807 SDValue AlignedExtract = Vec;
9808 unsigned AlignedIdx = OrigIdx - RemIdx;
9809 if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
9810 InterSubVT = getLMUL1VT(VecVT);
9811 // Extract a subvector equal to the nearest full vector register type. This
9812 // should resolve to a EXTRACT_SUBREG instruction.
9813 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
9814 DAG.getVectorIdxConstant(AlignedIdx, DL));
9815 }
9816
9817 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
9818 DAG.getUNDEF(InterSubVT), SubVec,
9819 DAG.getVectorIdxConstant(0, DL));
9820
9821 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
9822
9823 ElementCount EndIndex =
9825 VL = computeVLMax(SubVecVT, DL, DAG);
9826
9827 // Use tail agnostic policy if we're inserting over InterSubVT's tail.
9829 if (EndIndex == InterSubVT.getVectorElementCount())
9830 Policy = RISCVII::TAIL_AGNOSTIC;
9831
9832 // If we're inserting into the lowest elements, use a tail undisturbed
9833 // vmv.v.v.
9834 if (RemIdx == 0) {
9835 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
9836 SubVec, VL);
9837 } else {
9838 SDValue SlideupAmt =
9839 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), RemIdx));
9840
9841 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
9842 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
9843
9844 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
9845 SlideupAmt, Mask, VL, Policy);
9846 }
9847
9848 // If required, insert this subvector back into the correct vector register.
9849 // This should resolve to an INSERT_SUBREG instruction.
9850 if (VecVT.bitsGT(InterSubVT))
9851 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, SubVec,
9852 DAG.getVectorIdxConstant(AlignedIdx, DL));
9853
9854 // We might have bitcast from a mask type: cast back to the original type if
9855 // required.
9856 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
9857}
9858
9859SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
9860 SelectionDAG &DAG) const {
9861 SDValue Vec = Op.getOperand(0);
9862 MVT SubVecVT = Op.getSimpleValueType();
9863 MVT VecVT = Vec.getSimpleValueType();
9864
9865 SDLoc DL(Op);
9866 MVT XLenVT = Subtarget.getXLenVT();
9867 unsigned OrigIdx = Op.getConstantOperandVal(1);
9868 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
9869
9870 // We don't have the ability to slide mask vectors down indexed by their i1
9871 // elements; the smallest we can do is i8. Often we are able to bitcast to
9872 // equivalent i8 vectors. Note that when extracting a fixed-length vector
9873 // from a scalable one, we might not necessarily have enough scalable
9874 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
9875 if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) {
9876 if (VecVT.getVectorMinNumElements() >= 8 &&
9877 SubVecVT.getVectorMinNumElements() >= 8) {
9878 assert(OrigIdx % 8 == 0 && "Invalid index");
9879 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
9880 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
9881 "Unexpected mask vector lowering");
9882 OrigIdx /= 8;
9883 SubVecVT =
9884 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
9885 SubVecVT.isScalableVector());
9886 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
9887 VecVT.isScalableVector());
9888 Vec = DAG.getBitcast(VecVT, Vec);
9889 } else {
9890 // We can't slide this mask vector down, indexed by its i1 elements.
9891 // This poses a problem when we wish to extract a scalable vector which
9892 // can't be re-expressed as a larger type. Just choose the slow path and
9893 // extend to a larger type, then truncate back down.
9894 // TODO: We could probably improve this when extracting certain fixed
9895 // from fixed, where we can extract as i8 and shift the correct element
9896 // right to reach the desired subvector?
9897 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
9898 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
9899 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
9900 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
9901 Op.getOperand(1));
9902 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
9903 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
9904 }
9905 }
9906
9907 // With an index of 0 this is a cast-like subvector, which can be performed
9908 // with subregister operations.
9909 if (OrigIdx == 0)
9910 return Op;
9911
9912 const auto VLen = Subtarget.getRealVLen();
9913
9914 // If the subvector vector is a fixed-length type and we don't know VLEN
9915 // exactly, we cannot use subregister manipulation to simplify the codegen; we
9916 // don't know which register of a LMUL group contains the specific subvector
9917 // as we only know the minimum register size. Therefore we must slide the
9918 // vector group down the full amount.
9919 if (SubVecVT.isFixedLengthVector() && !VLen) {
9920 MVT ContainerVT = VecVT;
9921 if (VecVT.isFixedLengthVector()) {
9922 ContainerVT = getContainerForFixedLengthVector(VecVT);
9923 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9924 }
9925
9926 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
9927 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
9928 if (auto ShrunkVT =
9929 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
9930 ContainerVT = *ShrunkVT;
9931 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
9932 DAG.getVectorIdxConstant(0, DL));
9933 }
9934
9935 SDValue Mask =
9936 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
9937 // Set the vector length to only the number of elements we care about. This
9938 // avoids sliding down elements we're going to discard straight away.
9939 SDValue VL = getVLOp(SubVecVT.getVectorNumElements(), ContainerVT, DL, DAG,
9940 Subtarget);
9941 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
9942 SDValue Slidedown =
9943 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
9944 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
9945 // Now we can use a cast-like subvector extract to get the result.
9946 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
9947 DAG.getVectorIdxConstant(0, DL));
9948 return DAG.getBitcast(Op.getValueType(), Slidedown);
9949 }
9950
9951 if (VecVT.isFixedLengthVector()) {
9952 VecVT = getContainerForFixedLengthVector(VecVT);
9953 Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget);
9954 }
9955
9956 MVT ContainerSubVecVT = SubVecVT;
9957 if (SubVecVT.isFixedLengthVector())
9958 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
9959
9960 unsigned SubRegIdx;
9961 ElementCount RemIdx;
9962 // extract_subvector scales the index by vscale if the subvector is scalable,
9963 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
9964 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
9965 if (SubVecVT.isFixedLengthVector()) {
9966 assert(VLen);
9967 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
9968 auto Decompose =
9970 VecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
9971 SubRegIdx = Decompose.first;
9972 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
9973 (OrigIdx % Vscale));
9974 } else {
9975 auto Decompose =
9977 VecVT, ContainerSubVecVT, OrigIdx, TRI);
9978 SubRegIdx = Decompose.first;
9979 RemIdx = ElementCount::getScalable(Decompose.second);
9980 }
9981
9982 // If the Idx has been completely eliminated then this is a subvector extract
9983 // which naturally aligns to a vector register. These can easily be handled
9984 // using subregister manipulation.
9985 if (RemIdx.isZero()) {
9986 if (SubVecVT.isFixedLengthVector()) {
9987 Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, ContainerSubVecVT, Vec);
9988 return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);
9989 }
9990 return Op;
9991 }
9992
9993 // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
9994 // was > M1 then the index would need to be a multiple of VLMAX, and so would
9995 // divide exactly.
9996 assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||
9997 getLMUL(ContainerSubVecVT) == RISCVII::VLMUL::LMUL_1);
9998
9999 // If the vector type is an LMUL-group type, extract a subvector equal to the
10000 // nearest full vector register type.
10001 MVT InterSubVT = VecVT;
10002 if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
10003 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
10004 // we should have successfully decomposed the extract into a subregister.
10005 assert(SubRegIdx != RISCV::NoSubRegister);
10006 InterSubVT = getLMUL1VT(VecVT);
10007 Vec = DAG.getTargetExtractSubreg(SubRegIdx, DL, InterSubVT, Vec);
10008 }
10009
10010 // Slide this vector register down by the desired number of elements in order
10011 // to place the desired subvector starting at element 0.
10012 SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10013 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
10014 if (SubVecVT.isFixedLengthVector())
10015 VL = getVLOp(SubVecVT.getVectorNumElements(), InterSubVT, DL, DAG,
10016 Subtarget);
10017 SDValue Slidedown =
10018 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
10019 Vec, SlidedownAmt, Mask, VL);
10020
10021 // Now the vector is in the right position, extract our final subvector. This
10022 // should resolve to a COPY.
10023 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10024 DAG.getVectorIdxConstant(0, DL));
10025
10026 // We might have bitcast from a mask type: cast back to the original type if
10027 // required.
10028 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
10029}
10030
10031// Widen a vector's operands to i8, then truncate its results back to the
10032// original type, typically i1. All operand and result types must be the same.
10034 SelectionDAG &DAG) {
10035 MVT VT = N.getSimpleValueType();
10036 MVT WideVT = VT.changeVectorElementType(MVT::i8);
10038 for (SDValue Op : N->ops()) {
10039 assert(Op.getSimpleValueType() == VT &&
10040 "Operands and result must be same type");
10041 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
10042 }
10043
10044 unsigned NumVals = N->getNumValues();
10045
10047 NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
10048 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
10049 SmallVector<SDValue, 4> TruncVals;
10050 for (unsigned I = 0; I < NumVals; I++) {
10051 TruncVals.push_back(
10052 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
10053 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
10054 }
10055
10056 if (TruncVals.size() > 1)
10057 return DAG.getMergeValues(TruncVals, DL);
10058 return TruncVals.front();
10059}
10060
10061SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
10062 SelectionDAG &DAG) const {
10063 SDLoc DL(Op);
10064 MVT VecVT = Op.getSimpleValueType();
10065
10066 assert(VecVT.isScalableVector() &&
10067 "vector_interleave on non-scalable vector!");
10068
10069 // 1 bit element vectors need to be widened to e8
10070 if (VecVT.getVectorElementType() == MVT::i1)
10071 return widenVectorOpsToi8(Op, DL, DAG);
10072
10073 // If the VT is LMUL=8, we need to split and reassemble.
10074 if (VecVT.getSizeInBits().getKnownMinValue() ==
10075 (8 * RISCV::RVVBitsPerBlock)) {
10076 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10077 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10078 EVT SplitVT = Op0Lo.getValueType();
10079
10081 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op0Hi);
10083 DAG.getVTList(SplitVT, SplitVT), Op1Lo, Op1Hi);
10084
10085 SDValue Even = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10086 ResLo.getValue(0), ResHi.getValue(0));
10087 SDValue Odd = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, ResLo.getValue(1),
10088 ResHi.getValue(1));
10089 return DAG.getMergeValues({Even, Odd}, DL);
10090 }
10091
10092 // Concatenate the two vectors as one vector to deinterleave
10093 MVT ConcatVT =
10096 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
10097 Op.getOperand(0), Op.getOperand(1));
10098
10099 // We want to operate on all lanes, so get the mask and VL and mask for it
10100 auto [Mask, VL] = getDefaultScalableVLOps(ConcatVT, DL, DAG, Subtarget);
10101 SDValue Passthru = DAG.getUNDEF(ConcatVT);
10102
10103 // We can deinterleave through vnsrl.wi if the element type is smaller than
10104 // ELEN
10105 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10106 SDValue Even =
10107 getDeinterleaveViaVNSRL(DL, VecVT, Concat, true, Subtarget, DAG);
10108 SDValue Odd =
10109 getDeinterleaveViaVNSRL(DL, VecVT, Concat, false, Subtarget, DAG);
10110 return DAG.getMergeValues({Even, Odd}, DL);
10111 }
10112
10113 // For the indices, use the same SEW to avoid an extra vsetvli
10114 MVT IdxVT = ConcatVT.changeVectorElementTypeToInteger();
10115 // Create a vector of even indices {0, 2, 4, ...}
10116 SDValue EvenIdx =
10117 DAG.getStepVector(DL, IdxVT, APInt(IdxVT.getScalarSizeInBits(), 2));
10118 // Create a vector of odd indices {1, 3, 5, ... }
10119 SDValue OddIdx =
10120 DAG.getNode(ISD::ADD, DL, IdxVT, EvenIdx, DAG.getConstant(1, DL, IdxVT));
10121
10122 // Gather the even and odd elements into two separate vectors
10123 SDValue EvenWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
10124 Concat, EvenIdx, Passthru, Mask, VL);
10125 SDValue OddWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
10126 Concat, OddIdx, Passthru, Mask, VL);
10127
10128 // Extract the result half of the gather for even and odd
10129 SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, EvenWide,
10130 DAG.getVectorIdxConstant(0, DL));
10131 SDValue Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, OddWide,
10132 DAG.getVectorIdxConstant(0, DL));
10133
10134 return DAG.getMergeValues({Even, Odd}, DL);
10135}
10136
10137SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
10138 SelectionDAG &DAG) const {
10139 SDLoc DL(Op);
10140 MVT VecVT = Op.getSimpleValueType();
10141
10142 assert(VecVT.isScalableVector() &&
10143 "vector_interleave on non-scalable vector!");
10144
10145 // i1 vectors need to be widened to i8
10146 if (VecVT.getVectorElementType() == MVT::i1)
10147 return widenVectorOpsToi8(Op, DL, DAG);
10148
10149 MVT XLenVT = Subtarget.getXLenVT();
10150 SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
10151
10152 // If the VT is LMUL=8, we need to split and reassemble.
10153 if (VecVT.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock)) {
10154 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10155 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10156 EVT SplitVT = Op0Lo.getValueType();
10157
10159 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op1Lo);
10161 DAG.getVTList(SplitVT, SplitVT), Op0Hi, Op1Hi);
10162
10163 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10164 ResLo.getValue(0), ResLo.getValue(1));
10165 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10166 ResHi.getValue(0), ResHi.getValue(1));
10167 return DAG.getMergeValues({Lo, Hi}, DL);
10168 }
10169
10170 SDValue Interleaved;
10171
10172 // If the element type is smaller than ELEN, then we can interleave with
10173 // vwaddu.vv and vwmaccu.vx
10174 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10175 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
10176 DAG, Subtarget);
10177 } else {
10178 // Otherwise, fallback to using vrgathere16.vv
10179 MVT ConcatVT =
10182 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
10183 Op.getOperand(0), Op.getOperand(1));
10184
10185 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
10186
10187 // 0 1 2 3 4 5 6 7 ...
10188 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
10189
10190 // 1 1 1 1 1 1 1 1 ...
10191 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
10192
10193 // 1 0 1 0 1 0 1 0 ...
10194 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
10195 OddMask = DAG.getSetCC(
10196 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
10197 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
10199
10200 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
10201
10202 // Build up the index vector for interleaving the concatenated vector
10203 // 0 0 1 1 2 2 3 3 ...
10204 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
10205 // 0 n 1 n+1 2 n+2 3 n+3 ...
10206 Idx =
10207 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
10208
10209 // Then perform the interleave
10210 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
10211 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
10212 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
10213 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
10214 }
10215
10216 // Extract the two halves from the interleaved result
10217 SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
10218 DAG.getVectorIdxConstant(0, DL));
10219 SDValue Hi = DAG.getNode(
10220 ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
10222
10223 return DAG.getMergeValues({Lo, Hi}, DL);
10224}
10225
10226// Lower step_vector to the vid instruction. Any non-identity step value must
10227// be accounted for my manual expansion.
10228SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
10229 SelectionDAG &DAG) const {
10230 SDLoc DL(Op);
10231 MVT VT = Op.getSimpleValueType();
10232 assert(VT.isScalableVector() && "Expected scalable vector");
10233 MVT XLenVT = Subtarget.getXLenVT();
10234 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
10235 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
10236 uint64_t StepValImm = Op.getConstantOperandVal(0);
10237 if (StepValImm != 1) {
10238 if (isPowerOf2_64(StepValImm)) {
10239 SDValue StepVal =
10240 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
10241 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
10242 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
10243 } else {
10244 SDValue StepVal = lowerScalarSplat(
10245 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
10246 VL, VT, DL, DAG, Subtarget);
10247 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
10248 }
10249 }
10250 return StepVec;
10251}
10252
10253// Implement vector_reverse using vrgather.vv with indices determined by
10254// subtracting the id of each element from (VLMAX-1). This will convert
10255// the indices like so:
10256// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
10257// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
10258SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
10259 SelectionDAG &DAG) const {
10260 SDLoc DL(Op);
10261 MVT VecVT = Op.getSimpleValueType();
10262 if (VecVT.getVectorElementType() == MVT::i1) {
10263 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
10264 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
10265 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
10266 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Op2);
10267 }
10268 unsigned EltSize = VecVT.getScalarSizeInBits();
10269 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
10270 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
10271 unsigned MaxVLMAX =
10272 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
10273
10274 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
10275 MVT IntVT = VecVT.changeVectorElementTypeToInteger();
10276
10277 // If this is SEW=8 and VLMAX is potentially more than 256, we need
10278 // to use vrgatherei16.vv.
10279 // TODO: It's also possible to use vrgatherei16.vv for other types to
10280 // decrease register width for the index calculation.
10281 if (MaxVLMAX > 256 && EltSize == 8) {
10282 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
10283 // Reverse each half, then reassemble them in reverse order.
10284 // NOTE: It's also possible that after splitting that VLMAX no longer
10285 // requires vrgatherei16.vv.
10286 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
10287 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10288 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
10289 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
10290 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
10291 // Reassemble the low and high pieces reversed.
10292 // FIXME: This is a CONCAT_VECTORS.
10293 SDValue Res =
10294 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
10295 DAG.getVectorIdxConstant(0, DL));
10296 return DAG.getNode(
10297 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
10298 DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
10299 }
10300
10301 // Just promote the int type to i16 which will double the LMUL.
10302 IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount());
10303 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
10304 }
10305
10306 MVT XLenVT = Subtarget.getXLenVT();
10307 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
10308
10309 // Calculate VLMAX-1 for the desired SEW.
10310 SDValue VLMinus1 = DAG.getNode(ISD::SUB, DL, XLenVT,
10311 computeVLMax(VecVT, DL, DAG),
10312 DAG.getConstant(1, DL, XLenVT));
10313
10314 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
10315 bool IsRV32E64 =
10316 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
10317 SDValue SplatVL;
10318 if (!IsRV32E64)
10319 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
10320 else
10321 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
10322 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
10323
10324 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
10325 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
10326 DAG.getUNDEF(IntVT), Mask, VL);
10327
10328 return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices,
10329 DAG.getUNDEF(VecVT), Mask, VL);
10330}
10331
10332SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
10333 SelectionDAG &DAG) const {
10334 SDLoc DL(Op);
10335 SDValue V1 = Op.getOperand(0);
10336 SDValue V2 = Op.getOperand(1);
10337 MVT XLenVT = Subtarget.getXLenVT();
10338 MVT VecVT = Op.getSimpleValueType();
10339
10340 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
10341
10342 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
10343 SDValue DownOffset, UpOffset;
10344 if (ImmValue >= 0) {
10345 // The operand is a TargetConstant, we need to rebuild it as a regular
10346 // constant.
10347 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
10348 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
10349 } else {
10350 // The operand is a TargetConstant, we need to rebuild it as a regular
10351 // constant rather than negating the original operand.
10352 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
10353 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
10354 }
10355
10356 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
10357
10358 SDValue SlideDown =
10359 getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1,
10360 DownOffset, TrueMask, UpOffset);
10361 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
10362 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
10364}
10365
10366SDValue
10367RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
10368 SelectionDAG &DAG) const {
10369 SDLoc DL(Op);
10370 auto *Load = cast<LoadSDNode>(Op);
10371
10373 Load->getMemoryVT(),
10374 *Load->getMemOperand()) &&
10375 "Expecting a correctly-aligned load");
10376
10377 MVT VT = Op.getSimpleValueType();
10378 MVT XLenVT = Subtarget.getXLenVT();
10379 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10380
10381 // If we know the exact VLEN and our fixed length vector completely fills
10382 // the container, use a whole register load instead.
10383 const auto [MinVLMAX, MaxVLMAX] =
10384 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
10385 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
10386 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
10387 MachineMemOperand *MMO = Load->getMemOperand();
10389 MMO = MF.getMachineMemOperand(
10390 MMO, MMO->getPointerInfo(),
10391 MMO->getMemoryType().isValid()
10393 : MMO->getMemoryType());
10394 SDValue NewLoad =
10395 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(), MMO);
10396 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
10397 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
10398 }
10399
10400 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG, Subtarget);
10401
10402 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
10403 SDValue IntID = DAG.getTargetConstant(
10404 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
10405 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
10406 if (!IsMaskOp)
10407 Ops.push_back(DAG.getUNDEF(ContainerVT));
10408 Ops.push_back(Load->getBasePtr());
10409 Ops.push_back(VL);
10410 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
10411 SDValue NewLoad =
10413 Load->getMemoryVT(), Load->getMemOperand());
10414
10415 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
10416 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
10417}
10418
10419SDValue
10420RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
10421 SelectionDAG &DAG) const {
10422 SDLoc DL(Op);
10423 auto *Store = cast<StoreSDNode>(Op);
10424
10426 Store->getMemoryVT(),
10427 *Store->getMemOperand()) &&
10428 "Expecting a correctly-aligned store");
10429
10430 SDValue StoreVal = Store->getValue();
10431 MVT VT = StoreVal.getSimpleValueType();
10432 MVT XLenVT = Subtarget.getXLenVT();
10433
10434 // If the size less than a byte, we need to pad with zeros to make a byte.
10435 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
10436 VT = MVT::v8i1;
10437 StoreVal =
10438 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getConstant(0, DL, VT),
10439 StoreVal, DAG.getVectorIdxConstant(0, DL));
10440 }
10441
10442 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10443
10444 SDValue NewValue =
10445 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
10446
10447
10448 // If we know the exact VLEN and our fixed length vector completely fills
10449 // the container, use a whole register store instead.
10450 const auto [MinVLMAX, MaxVLMAX] =
10451 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
10452 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
10453 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
10454 MachineMemOperand *MMO = Store->getMemOperand();
10456 MMO = MF.getMachineMemOperand(
10457 MMO, MMO->getPointerInfo(),
10458 MMO->getMemoryType().isValid()
10460 : MMO->getMemoryType());
10461 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
10462 MMO);
10463 }
10464
10465 SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
10466 Subtarget);
10467
10468 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
10469 SDValue IntID = DAG.getTargetConstant(
10470 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
10471 return DAG.getMemIntrinsicNode(
10472 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
10473 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
10474 Store->getMemoryVT(), Store->getMemOperand());
10475}
10476
10477SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
10478 SelectionDAG &DAG) const {
10479 SDLoc DL(Op);
10480 MVT VT = Op.getSimpleValueType();
10481
10482 const auto *MemSD = cast<MemSDNode>(Op);
10483 EVT MemVT = MemSD->getMemoryVT();
10484 MachineMemOperand *MMO = MemSD->getMemOperand();
10485 SDValue Chain = MemSD->getChain();
10486 SDValue BasePtr = MemSD->getBasePtr();
10487
10488 SDValue Mask, PassThru, VL;
10489 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
10490 Mask = VPLoad->getMask();
10491 PassThru = DAG.getUNDEF(VT);
10492 VL = VPLoad->getVectorLength();
10493 } else {
10494 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
10495 Mask = MLoad->getMask();
10496 PassThru = MLoad->getPassThru();
10497 }
10498
10499 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
10500
10501 MVT XLenVT = Subtarget.getXLenVT();
10502
10503 MVT ContainerVT = VT;
10504 if (VT.isFixedLengthVector()) {
10505 ContainerVT = getContainerForFixedLengthVector(VT);
10506 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
10507 if (!IsUnmasked) {
10508 MVT MaskVT = getMaskTypeFor(ContainerVT);
10509 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10510 }
10511 }
10512
10513 if (!VL)
10514 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10515
10516 unsigned IntID =
10517 IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask;
10518 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
10519 if (IsUnmasked)
10520 Ops.push_back(DAG.getUNDEF(ContainerVT));
10521 else
10522 Ops.push_back(PassThru);
10523 Ops.push_back(BasePtr);
10524 if (!IsUnmasked)
10525 Ops.push_back(Mask);
10526 Ops.push_back(VL);
10527 if (!IsUnmasked)
10529
10530 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
10531
10532 SDValue Result =
10533 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
10534 Chain = Result.getValue(1);
10535
10536 if (VT.isFixedLengthVector())
10537 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
10538
10539 return DAG.getMergeValues({Result, Chain}, DL);
10540}
10541
10542SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
10543 SelectionDAG &DAG) const {
10544 SDLoc DL(Op);
10545
10546 const auto *MemSD = cast<MemSDNode>(Op);
10547 EVT MemVT = MemSD->getMemoryVT();
10548 MachineMemOperand *MMO = MemSD->getMemOperand();
10549 SDValue Chain = MemSD->getChain();
10550 SDValue BasePtr = MemSD->getBasePtr();
10551 SDValue Val, Mask, VL;
10552
10553 bool IsCompressingStore = false;
10554 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
10555 Val = VPStore->getValue();
10556 Mask = VPStore->getMask();
10557 VL = VPStore->getVectorLength();
10558 } else {
10559 const auto *MStore = cast<MaskedStoreSDNode>(Op);
10560 Val = MStore->getValue();
10561 Mask = MStore->getMask();
10562 IsCompressingStore = MStore->isCompressingStore();
10563 }
10564
10565 bool IsUnmasked =
10566 ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
10567
10568 MVT VT = Val.getSimpleValueType();
10569 MVT XLenVT = Subtarget.getXLenVT();
10570
10571 MVT ContainerVT = VT;
10572 if (VT.isFixedLengthVector()) {
10573 ContainerVT = getContainerForFixedLengthVector(VT);
10574
10575 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
10576 if (!IsUnmasked || IsCompressingStore) {
10577 MVT MaskVT = getMaskTypeFor(ContainerVT);
10578 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10579 }
10580 }
10581
10582 if (!VL)
10583 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10584
10585 if (IsCompressingStore) {
10586 Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
10587 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
10588 DAG.getUNDEF(ContainerVT), Val, Mask, VL);
10589 VL =
10590 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
10591 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
10592 }
10593
10594 unsigned IntID =
10595 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
10596 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
10597 Ops.push_back(Val);
10598 Ops.push_back(BasePtr);
10599 if (!IsUnmasked)
10600 Ops.push_back(Mask);
10601 Ops.push_back(VL);
10602
10604 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
10605}
10606
10607SDValue
10608RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
10609 SelectionDAG &DAG) const {
10610 MVT InVT = Op.getOperand(0).getSimpleValueType();
10611 MVT ContainerVT = getContainerForFixedLengthVector(InVT);
10612
10613 MVT VT = Op.getSimpleValueType();
10614
10615 SDValue Op1 =
10616 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
10617 SDValue Op2 =
10618 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
10619
10620 SDLoc DL(Op);
10621 auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL,
10622 DAG, Subtarget);
10623 MVT MaskVT = getMaskTypeFor(ContainerVT);
10624
10625 SDValue Cmp =
10626 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
10627 {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL});
10628
10629 return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
10630}
10631
10632SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
10633 SelectionDAG &DAG) const {
10634 unsigned Opc = Op.getOpcode();
10635 SDLoc DL(Op);
10636 SDValue Chain = Op.getOperand(0);
10637 SDValue Op1 = Op.getOperand(1);
10638 SDValue Op2 = Op.getOperand(2);
10639 SDValue CC = Op.getOperand(3);
10640 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
10641 MVT VT = Op.getSimpleValueType();
10642 MVT InVT = Op1.getSimpleValueType();
10643
10644 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
10645 // condition code.
10646 if (Opc == ISD::STRICT_FSETCCS) {
10647 // Expand strict_fsetccs(x, oeq) to
10648 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
10649 SDVTList VTList = Op->getVTList();
10650 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
10651 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
10652 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
10653 Op2, OLECCVal);
10654 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
10655 Op1, OLECCVal);
10656 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
10657 Tmp1.getValue(1), Tmp2.getValue(1));
10658 // Tmp1 and Tmp2 might be the same node.
10659 if (Tmp1 != Tmp2)
10660 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
10661 return DAG.getMergeValues({Tmp1, OutChain}, DL);
10662 }
10663
10664 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
10665 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
10666 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
10667 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
10668 Op2, OEQCCVal);
10669 SDValue Res = DAG.getNOT(DL, OEQ, VT);
10670 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
10671 }
10672 }
10673
10674 MVT ContainerInVT = InVT;
10675 if (InVT.isFixedLengthVector()) {
10676 ContainerInVT = getContainerForFixedLengthVector(InVT);
10677 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
10678 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
10679 }
10680 MVT MaskVT = getMaskTypeFor(ContainerInVT);
10681
10682 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
10683
10684 SDValue Res;
10685 if (Opc == ISD::STRICT_FSETCC &&
10686 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
10687 CCVal == ISD::SETOLE)) {
10688 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
10689 // active when both input elements are ordered.
10690 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
10691 SDValue OrderMask1 = DAG.getNode(
10692 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
10693 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
10694 True, VL});
10695 SDValue OrderMask2 = DAG.getNode(
10696 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
10697 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
10698 True, VL});
10699 Mask =
10700 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
10701 // Use Mask as the merge operand to let the result be 0 if either of the
10702 // inputs is unordered.
10704 DAG.getVTList(MaskVT, MVT::Other),
10705 {Chain, Op1, Op2, CC, Mask, Mask, VL});
10706 } else {
10707 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
10709 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
10710 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
10711 }
10712
10713 if (VT.isFixedLengthVector()) {
10714 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
10715 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
10716 }
10717 return Res;
10718}
10719
10720// Lower vector ABS to smax(X, sub(0, X)).
10721SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
10722 SDLoc DL(Op);
10723 MVT VT = Op.getSimpleValueType();
10724 SDValue X = Op.getOperand(0);
10725
10726 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
10727 "Unexpected type for ISD::ABS");
10728
10729 MVT ContainerVT = VT;
10730 if (VT.isFixedLengthVector()) {
10731 ContainerVT = getContainerForFixedLengthVector(VT);
10732 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
10733 }
10734
10735 SDValue Mask, VL;
10736 if (Op->getOpcode() == ISD::VP_ABS) {
10737 Mask = Op->getOperand(1);
10738 if (VT.isFixedLengthVector())
10739 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
10740 Subtarget);
10741 VL = Op->getOperand(2);
10742 } else
10743 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
10744
10745 SDValue SplatZero = DAG.getNode(
10746 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
10747 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
10748 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
10749 DAG.getUNDEF(ContainerVT), Mask, VL);
10750 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
10751 DAG.getUNDEF(ContainerVT), Mask, VL);
10752
10753 if (VT.isFixedLengthVector())
10754 Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
10755 return Max;
10756}
10757
10758SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
10759 SDValue Op, SelectionDAG &DAG) const {
10760 SDLoc DL(Op);
10761 MVT VT = Op.getSimpleValueType();
10762 SDValue Mag = Op.getOperand(0);
10763 SDValue Sign = Op.getOperand(1);
10764 assert(Mag.getValueType() == Sign.getValueType() &&
10765 "Can only handle COPYSIGN with matching types.");
10766
10767 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10768 Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
10769 Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
10770
10771 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
10772
10773 SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag,
10774 Sign, DAG.getUNDEF(ContainerVT), Mask, VL);
10775
10776 return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
10777}
10778
10779SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
10780 SDValue Op, SelectionDAG &DAG) const {
10781 MVT VT = Op.getSimpleValueType();
10782 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10783
10784 MVT I1ContainerVT =
10785 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
10786
10787 SDValue CC =
10788 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
10789 SDValue Op1 =
10790 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
10791 SDValue Op2 =
10792 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
10793
10794 SDLoc DL(Op);
10795 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
10796
10797 SDValue Select = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, Op1,
10798 Op2, DAG.getUNDEF(ContainerVT), VL);
10799
10800 return convertFromScalableVector(VT, Select, DAG, Subtarget);
10801}
10802
10803SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
10804 SelectionDAG &DAG) const {
10805 unsigned NewOpc = getRISCVVLOp(Op);
10806 bool HasMergeOp = hasMergeOp(NewOpc);
10807 bool HasMask = hasMaskOp(NewOpc);
10808
10809 MVT VT = Op.getSimpleValueType();
10810 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10811
10812 // Create list of operands by converting existing ones to scalable types.
10814 for (const SDValue &V : Op->op_values()) {
10815 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
10816
10817 // Pass through non-vector operands.
10818 if (!V.getValueType().isVector()) {
10819 Ops.push_back(V);
10820 continue;
10821 }
10822
10823 // "cast" fixed length vector to a scalable vector.
10824 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
10825 "Only fixed length vectors are supported!");
10826 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
10827 }
10828
10829 SDLoc DL(Op);
10830 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
10831 if (HasMergeOp)
10832 Ops.push_back(DAG.getUNDEF(ContainerVT));
10833 if (HasMask)
10834 Ops.push_back(Mask);
10835 Ops.push_back(VL);
10836
10837 // StrictFP operations have two result values. Their lowered result should
10838 // have same result count.
10839 if (Op->isStrictFPOpcode()) {
10840 SDValue ScalableRes =
10841 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
10842 Op->getFlags());
10843 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
10844 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
10845 }
10846
10847 SDValue ScalableRes =
10848 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
10849 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
10850}
10851
10852// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
10853// * Operands of each node are assumed to be in the same order.
10854// * The EVL operand is promoted from i32 to i64 on RV64.
10855// * Fixed-length vectors are converted to their scalable-vector container
10856// types.
10857SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
10858 unsigned RISCVISDOpc = getRISCVVLOp(Op);
10859 bool HasMergeOp = hasMergeOp(RISCVISDOpc);
10860
10861 SDLoc DL(Op);
10862 MVT VT = Op.getSimpleValueType();
10864
10865 MVT ContainerVT = VT;
10866 if (VT.isFixedLengthVector())
10867 ContainerVT = getContainerForFixedLengthVector(VT);
10868
10869 for (const auto &OpIdx : enumerate(Op->ops())) {
10870 SDValue V = OpIdx.value();
10871 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
10872 // Add dummy merge value before the mask. Or if there isn't a mask, before
10873 // EVL.
10874 if (HasMergeOp) {
10875 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());
10876 if (MaskIdx) {
10877 if (*MaskIdx == OpIdx.index())
10878 Ops.push_back(DAG.getUNDEF(ContainerVT));
10879 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
10880 OpIdx.index()) {
10881 if (Op.getOpcode() == ISD::VP_MERGE) {
10882 // For VP_MERGE, copy the false operand instead of an undef value.
10883 Ops.push_back(Ops.back());
10884 } else {
10885 assert(Op.getOpcode() == ISD::VP_SELECT);
10886 // For VP_SELECT, add an undef value.
10887 Ops.push_back(DAG.getUNDEF(ContainerVT));
10888 }
10889 }
10890 }
10891 // Pass through operands which aren't fixed-length vectors.
10892 if (!V.getValueType().isFixedLengthVector()) {
10893 Ops.push_back(V);
10894 continue;
10895 }
10896 // "cast" fixed length vector to a scalable vector.
10897 MVT OpVT = V.getSimpleValueType();
10898 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
10899 assert(useRVVForFixedLengthVectorVT(OpVT) &&
10900 "Only fixed length vectors are supported!");
10901 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
10902 }
10903
10904 if (!VT.isFixedLengthVector())
10905 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
10906
10907 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
10908
10909 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
10910}
10911
10912SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
10913 SelectionDAG &DAG) const {
10914 SDLoc DL(Op);
10915 MVT VT = Op.getSimpleValueType();
10916
10917 SDValue Src = Op.getOperand(0);
10918 // NOTE: Mask is dropped.
10919 SDValue VL = Op.getOperand(2);
10920
10921 MVT ContainerVT = VT;
10922 if (VT.isFixedLengthVector()) {
10923 ContainerVT = getContainerForFixedLengthVector(VT);
10924 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
10925 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
10926 }
10927
10928 MVT XLenVT = Subtarget.getXLenVT();
10929 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
10930 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10931 DAG.getUNDEF(ContainerVT), Zero, VL);
10932
10933 SDValue SplatValue = DAG.getConstant(
10934 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
10935 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10936 DAG.getUNDEF(ContainerVT), SplatValue, VL);
10937
10938 SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat,
10939 ZeroSplat, DAG.getUNDEF(ContainerVT), VL);
10940 if (!VT.isFixedLengthVector())
10941 return Result;
10942 return convertFromScalableVector(VT, Result, DAG, Subtarget);
10943}
10944
10945SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
10946 SelectionDAG &DAG) const {
10947 SDLoc DL(Op);
10948 MVT VT = Op.getSimpleValueType();
10949
10950 SDValue Op1 = Op.getOperand(0);
10951 SDValue Op2 = Op.getOperand(1);
10952 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
10953 // NOTE: Mask is dropped.
10954 SDValue VL = Op.getOperand(4);
10955
10956 MVT ContainerVT = VT;
10957 if (VT.isFixedLengthVector()) {
10958 ContainerVT = getContainerForFixedLengthVector(VT);
10959 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
10960 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
10961 }
10962
10964 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
10965
10966 switch (Condition) {
10967 default:
10968 break;
10969 // X != Y --> (X^Y)
10970 case ISD::SETNE:
10971 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
10972 break;
10973 // X == Y --> ~(X^Y)
10974 case ISD::SETEQ: {
10975 SDValue Temp =
10976 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
10977 Result =
10978 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
10979 break;
10980 }
10981 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
10982 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
10983 case ISD::SETGT:
10984 case ISD::SETULT: {
10985 SDValue Temp =
10986 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
10987 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
10988 break;
10989 }
10990 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
10991 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
10992 case ISD::SETLT:
10993 case ISD::SETUGT: {
10994 SDValue Temp =
10995 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
10996 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
10997 break;
10998 }
10999 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
11000 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
11001 case ISD::SETGE:
11002 case ISD::SETULE: {
11003 SDValue Temp =
11004 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
11005 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
11006 break;
11007 }
11008 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
11009 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
11010 case ISD::SETLE:
11011 case ISD::SETUGE: {
11012 SDValue Temp =
11013 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
11014 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
11015 break;
11016 }
11017 }
11018
11019 if (!VT.isFixedLengthVector())
11020 return Result;
11021 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11022}
11023
11024// Lower Floating-Point/Integer Type-Convert VP SDNodes
11025SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
11026 SelectionDAG &DAG) const {
11027 SDLoc DL(Op);
11028
11029 SDValue Src = Op.getOperand(0);
11030 SDValue Mask = Op.getOperand(1);
11031 SDValue VL = Op.getOperand(2);
11032 unsigned RISCVISDOpc = getRISCVVLOp(Op);
11033
11034 MVT DstVT = Op.getSimpleValueType();
11035 MVT SrcVT = Src.getSimpleValueType();
11036 if (DstVT.isFixedLengthVector()) {
11037 DstVT = getContainerForFixedLengthVector(DstVT);
11038 SrcVT = getContainerForFixedLengthVector(SrcVT);
11039 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
11040 MVT MaskVT = getMaskTypeFor(DstVT);
11041 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11042 }
11043
11044 unsigned DstEltSize = DstVT.getScalarSizeInBits();
11045 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
11046
11048 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
11049 if (SrcVT.isInteger()) {
11050 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
11051
11052 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
11055
11056 // Do we need to do any pre-widening before converting?
11057 if (SrcEltSize == 1) {
11058 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
11059 MVT XLenVT = Subtarget.getXLenVT();
11060 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11061 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
11062 DAG.getUNDEF(IntVT), Zero, VL);
11063 SDValue One = DAG.getConstant(
11064 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
11065 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
11066 DAG.getUNDEF(IntVT), One, VL);
11067 Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat,
11068 ZeroSplat, DAG.getUNDEF(IntVT), VL);
11069 } else if (DstEltSize > (2 * SrcEltSize)) {
11070 // Widen before converting.
11071 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
11072 DstVT.getVectorElementCount());
11073 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
11074 }
11075
11076 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
11077 } else {
11078 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
11079 "Wrong input/output vector types");
11080
11081 // Convert f16 to f32 then convert f32 to i64.
11082 if (DstEltSize > (2 * SrcEltSize)) {
11083 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
11084 MVT InterimFVT =
11085 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
11086 Src =
11087 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
11088 }
11089
11090 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
11091 }
11092 } else { // Narrowing + Conversion
11093 if (SrcVT.isInteger()) {
11094 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
11095 // First do a narrowing convert to an FP type half the size, then round
11096 // the FP type to a small FP type if needed.
11097
11098 MVT InterimFVT = DstVT;
11099 if (SrcEltSize > (2 * DstEltSize)) {
11100 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
11101 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
11102 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
11103 }
11104
11105 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
11106
11107 if (InterimFVT != DstVT) {
11108 Src = Result;
11109 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
11110 }
11111 } else {
11112 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
11113 "Wrong input/output vector types");
11114 // First do a narrowing conversion to an integer half the size, then
11115 // truncate if needed.
11116
11117 if (DstEltSize == 1) {
11118 // First convert to the same size integer, then convert to mask using
11119 // setcc.
11120 assert(SrcEltSize >= 16 && "Unexpected FP type!");
11121 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
11122 DstVT.getVectorElementCount());
11123 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
11124
11125 // Compare the integer result to 0. The integer should be 0 or 1/-1,
11126 // otherwise the conversion was undefined.
11127 MVT XLenVT = Subtarget.getXLenVT();
11128 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
11129 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
11130 DAG.getUNDEF(InterimIVT), SplatZero, VL);
11131 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
11132 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
11133 DAG.getUNDEF(DstVT), Mask, VL});
11134 } else {
11135 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
11136 DstVT.getVectorElementCount());
11137
11138 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
11139
11140 while (InterimIVT != DstVT) {
11141 SrcEltSize /= 2;
11142 Src = Result;
11143 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
11144 DstVT.getVectorElementCount());
11145 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
11146 Src, Mask, VL);
11147 }
11148 }
11149 }
11150 }
11151
11152 MVT VT = Op.getSimpleValueType();
11153 if (!VT.isFixedLengthVector())
11154 return Result;
11155 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11156}
11157
11158SDValue
11159RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
11160 SelectionDAG &DAG) const {
11161 SDLoc DL(Op);
11162
11163 SDValue Op1 = Op.getOperand(0);
11164 SDValue Op2 = Op.getOperand(1);
11165 SDValue Offset = Op.getOperand(2);
11166 SDValue Mask = Op.getOperand(3);
11167 SDValue EVL1 = Op.getOperand(4);
11168 SDValue EVL2 = Op.getOperand(5);
11169
11170 const MVT XLenVT = Subtarget.getXLenVT();
11171 MVT VT = Op.getSimpleValueType();
11172 MVT ContainerVT = VT;
11173 if (VT.isFixedLengthVector()) {
11174 ContainerVT = getContainerForFixedLengthVector(VT);
11175 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11176 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11177 MVT MaskVT = getMaskTypeFor(ContainerVT);
11178 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11179 }
11180
11181 // EVL1 may need to be extended to XLenVT with RV64LegalI32.
11182 EVL1 = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, EVL1);
11183
11184 bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
11185 if (IsMaskVector) {
11186 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
11187
11188 // Expand input operands
11189 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11190 DAG.getUNDEF(ContainerVT),
11191 DAG.getConstant(1, DL, XLenVT), EVL1);
11192 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11193 DAG.getUNDEF(ContainerVT),
11194 DAG.getConstant(0, DL, XLenVT), EVL1);
11195 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1,
11196 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);
11197
11198 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11199 DAG.getUNDEF(ContainerVT),
11200 DAG.getConstant(1, DL, XLenVT), EVL2);
11201 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11202 DAG.getUNDEF(ContainerVT),
11203 DAG.getConstant(0, DL, XLenVT), EVL2);
11204 Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2,
11205 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
11206 }
11207
11208 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
11209 SDValue DownOffset, UpOffset;
11210 if (ImmValue >= 0) {
11211 // The operand is a TargetConstant, we need to rebuild it as a regular
11212 // constant.
11213 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
11214 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);
11215 } else {
11216 // The operand is a TargetConstant, we need to rebuild it as a regular
11217 // constant rather than negating the original operand.
11218 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
11219 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
11220 }
11221
11222 SDValue SlideDown =
11223 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
11224 Op1, DownOffset, Mask, UpOffset);
11225 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, SlideDown, Op2,
11226 UpOffset, Mask, EVL2, RISCVII::TAIL_AGNOSTIC);
11227
11228 if (IsMaskVector) {
11229 // Truncate Result back to a mask vector (Result has same EVL as Op2)
11230 Result = DAG.getNode(
11231 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
11232 {Result, DAG.getConstant(0, DL, ContainerVT),
11233 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
11234 Mask, EVL2});
11235 }
11236
11237 if (!VT.isFixedLengthVector())
11238 return Result;
11239 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11240}
11241
11242SDValue
11243RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
11244 SelectionDAG &DAG) const {
11245 SDLoc DL(Op);
11246 MVT VT = Op.getSimpleValueType();
11247 MVT XLenVT = Subtarget.getXLenVT();
11248
11249 SDValue Op1 = Op.getOperand(0);
11250 SDValue Mask = Op.getOperand(1);
11251 SDValue EVL = Op.getOperand(2);
11252
11253 MVT ContainerVT = VT;
11254 if (VT.isFixedLengthVector()) {
11255 ContainerVT = getContainerForFixedLengthVector(VT);
11256 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11257 MVT MaskVT = getMaskTypeFor(ContainerVT);
11258 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11259 }
11260
11261 MVT GatherVT = ContainerVT;
11262 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
11263 // Check if we are working with mask vectors
11264 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
11265 if (IsMaskVector) {
11266 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
11267
11268 // Expand input operand
11269 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11270 DAG.getUNDEF(IndicesVT),
11271 DAG.getConstant(1, DL, XLenVT), EVL);
11272 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11273 DAG.getUNDEF(IndicesVT),
11274 DAG.getConstant(0, DL, XLenVT), EVL);
11275 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne,
11276 SplatZero, DAG.getUNDEF(IndicesVT), EVL);
11277 }
11278
11279 unsigned EltSize = GatherVT.getScalarSizeInBits();
11280 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
11281 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
11282 unsigned MaxVLMAX =
11283 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
11284
11285 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
11286 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
11287 // to use vrgatherei16.vv.
11288 // TODO: It's also possible to use vrgatherei16.vv for other types to
11289 // decrease register width for the index calculation.
11290 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
11291 if (MaxVLMAX > 256 && EltSize == 8) {
11292 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
11293 // Split the vector in half and reverse each half using a full register
11294 // reverse.
11295 // Swap the halves and concatenate them.
11296 // Slide the concatenated result by (VLMax - VL).
11297 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
11298 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
11299 auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
11300
11301 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
11302 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
11303
11304 // Reassemble the low and high pieces reversed.
11305 // NOTE: this Result is unmasked (because we do not need masks for
11306 // shuffles). If in the future this has to change, we can use a SELECT_VL
11307 // between Result and UNDEF using the mask originally passed to VP_REVERSE
11308 SDValue Result =
11309 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
11310
11311 // Slide off any elements from past EVL that were reversed into the low
11312 // elements.
11313 unsigned MinElts = GatherVT.getVectorMinNumElements();
11314 SDValue VLMax =
11315 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), MinElts));
11316 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
11317
11318 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
11319 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
11320
11321 if (IsMaskVector) {
11322 // Truncate Result back to a mask vector
11323 Result =
11324 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
11325 {Result, DAG.getConstant(0, DL, GatherVT),
11327 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
11328 }
11329
11330 if (!VT.isFixedLengthVector())
11331 return Result;
11332 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11333 }
11334
11335 // Just promote the int type to i16 which will double the LMUL.
11336 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
11337 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
11338 }
11339
11340 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
11341 SDValue VecLen =
11342 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
11343 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
11344 DAG.getUNDEF(IndicesVT), VecLen, EVL);
11345 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
11346 DAG.getUNDEF(IndicesVT), Mask, EVL);
11347 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
11348 DAG.getUNDEF(GatherVT), Mask, EVL);
11349
11350 if (IsMaskVector) {
11351 // Truncate Result back to a mask vector
11352 Result = DAG.getNode(
11353 RISCVISD::SETCC_VL, DL, ContainerVT,
11354 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
11355 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
11356 }
11357
11358 if (!VT.isFixedLengthVector())
11359 return Result;
11360 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11361}
11362
11363SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
11364 SelectionDAG &DAG) const {
11365 MVT VT = Op.getSimpleValueType();
11366 if (VT.getVectorElementType() != MVT::i1)
11367 return lowerVPOp(Op, DAG);
11368
11369 // It is safe to drop mask parameter as masked-off elements are undef.
11370 SDValue Op1 = Op->getOperand(0);
11371 SDValue Op2 = Op->getOperand(1);
11372 SDValue VL = Op->getOperand(3);
11373
11374 MVT ContainerVT = VT;
11375 const bool IsFixed = VT.isFixedLengthVector();
11376 if (IsFixed) {
11377 ContainerVT = getContainerForFixedLengthVector(VT);
11378 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11379 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11380 }
11381
11382 SDLoc DL(Op);
11383 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
11384 if (!IsFixed)
11385 return Val;
11386 return convertFromScalableVector(VT, Val, DAG, Subtarget);
11387}
11388
11389SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
11390 SelectionDAG &DAG) const {
11391 SDLoc DL(Op);
11392 MVT XLenVT = Subtarget.getXLenVT();
11393 MVT VT = Op.getSimpleValueType();
11394 MVT ContainerVT = VT;
11395 if (VT.isFixedLengthVector())
11396 ContainerVT = getContainerForFixedLengthVector(VT);
11397
11398 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11399
11400 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
11401 // Check if the mask is known to be all ones
11402 SDValue Mask = VPNode->getMask();
11403 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11404
11405 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
11406 : Intrinsic::riscv_vlse_mask,
11407 DL, XLenVT);
11408 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
11409 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
11410 VPNode->getStride()};
11411 if (!IsUnmasked) {
11412 if (VT.isFixedLengthVector()) {
11413 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
11414 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11415 }
11416 Ops.push_back(Mask);
11417 }
11418 Ops.push_back(VPNode->getVectorLength());
11419 if (!IsUnmasked) {
11420 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
11421 Ops.push_back(Policy);
11422 }
11423
11424 SDValue Result =
11426 VPNode->getMemoryVT(), VPNode->getMemOperand());
11427 SDValue Chain = Result.getValue(1);
11428
11429 if (VT.isFixedLengthVector())
11430 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11431
11432 return DAG.getMergeValues({Result, Chain}, DL);
11433}
11434
11435SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
11436 SelectionDAG &DAG) const {
11437 SDLoc DL(Op);
11438 MVT XLenVT = Subtarget.getXLenVT();
11439
11440 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
11441 SDValue StoreVal = VPNode->getValue();
11442 MVT VT = StoreVal.getSimpleValueType();
11443 MVT ContainerVT = VT;
11444 if (VT.isFixedLengthVector()) {
11445 ContainerVT = getContainerForFixedLengthVector(VT);
11446 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
11447 }
11448
11449 // Check if the mask is known to be all ones
11450 SDValue Mask = VPNode->getMask();
11451 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11452
11453 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
11454 : Intrinsic::riscv_vsse_mask,
11455 DL, XLenVT);
11456 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
11457 VPNode->getBasePtr(), VPNode->getStride()};
11458 if (!IsUnmasked) {
11459 if (VT.isFixedLengthVector()) {
11460 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
11461 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11462 }
11463 Ops.push_back(Mask);
11464 }
11465 Ops.push_back(VPNode->getVectorLength());
11466
11467 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
11468 Ops, VPNode->getMemoryVT(),
11469 VPNode->getMemOperand());
11470}
11471
11472// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
11473// matched to a RVV indexed load. The RVV indexed load instructions only
11474// support the "unsigned unscaled" addressing mode; indices are implicitly
11475// zero-extended or truncated to XLEN and are treated as byte offsets. Any
11476// signed or scaled indexing is extended to the XLEN value type and scaled
11477// accordingly.
11478SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
11479 SelectionDAG &DAG) const {
11480 SDLoc DL(Op);
11481 MVT VT = Op.getSimpleValueType();
11482
11483 const auto *MemSD = cast<MemSDNode>(Op.getNode());
11484 EVT MemVT = MemSD->getMemoryVT();
11485 MachineMemOperand *MMO = MemSD->getMemOperand();
11486 SDValue Chain = MemSD->getChain();
11487 SDValue BasePtr = MemSD->getBasePtr();
11488
11489 [[maybe_unused]] ISD::LoadExtType LoadExtType;
11490 SDValue Index, Mask, PassThru, VL;
11491
11492 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
11493 Index = VPGN->getIndex();
11494 Mask = VPGN->getMask();
11495 PassThru = DAG.getUNDEF(VT);
11496 VL = VPGN->getVectorLength();
11497 // VP doesn't support extending loads.
11499 } else {
11500 // Else it must be a MGATHER.
11501 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
11502 Index = MGN->getIndex();
11503 Mask = MGN->getMask();
11504 PassThru = MGN->getPassThru();
11505 LoadExtType = MGN->getExtensionType();
11506 }
11507
11508 MVT IndexVT = Index.getSimpleValueType();
11509 MVT XLenVT = Subtarget.getXLenVT();
11510
11512 "Unexpected VTs!");
11513 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
11514 // Targets have to explicitly opt-in for extending vector loads.
11515 assert(LoadExtType == ISD::NON_EXTLOAD &&
11516 "Unexpected extending MGATHER/VP_GATHER");
11517
11518 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
11519 // the selection of the masked intrinsics doesn't do this for us.
11520 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11521
11522 MVT ContainerVT = VT;
11523 if (VT.isFixedLengthVector()) {
11524 ContainerVT = getContainerForFixedLengthVector(VT);
11525 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
11526 ContainerVT.getVectorElementCount());
11527
11528 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
11529
11530 if (!IsUnmasked) {
11531 MVT MaskVT = getMaskTypeFor(ContainerVT);
11532 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11533 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
11534 }
11535 }
11536
11537 if (!VL)
11538 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11539
11540 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
11541 IndexVT = IndexVT.changeVectorElementType(XLenVT);
11542 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
11543 }
11544
11545 unsigned IntID =
11546 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
11547 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11548 if (IsUnmasked)
11549 Ops.push_back(DAG.getUNDEF(ContainerVT));
11550 else
11551 Ops.push_back(PassThru);
11552 Ops.push_back(BasePtr);
11553 Ops.push_back(Index);
11554 if (!IsUnmasked)
11555 Ops.push_back(Mask);
11556 Ops.push_back(VL);
11557 if (!IsUnmasked)
11559
11560 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11561 SDValue Result =
11562 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
11563 Chain = Result.getValue(1);
11564
11565 if (VT.isFixedLengthVector())
11566 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11567
11568 return DAG.getMergeValues({Result, Chain}, DL);
11569}
11570
11571// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
11572// matched to a RVV indexed store. The RVV indexed store instructions only
11573// support the "unsigned unscaled" addressing mode; indices are implicitly
11574// zero-extended or truncated to XLEN and are treated as byte offsets. Any
11575// signed or scaled indexing is extended to the XLEN value type and scaled
11576// accordingly.
11577SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
11578 SelectionDAG &DAG) const {
11579 SDLoc DL(Op);
11580 const auto *MemSD = cast<MemSDNode>(Op.getNode());
11581 EVT MemVT = MemSD->getMemoryVT();
11582 MachineMemOperand *MMO = MemSD->getMemOperand();
11583 SDValue Chain = MemSD->getChain();
11584 SDValue BasePtr = MemSD->getBasePtr();
11585
11586 [[maybe_unused]] bool IsTruncatingStore = false;
11587 SDValue Index, Mask, Val, VL;
11588
11589 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
11590 Index = VPSN->getIndex();
11591 Mask = VPSN->getMask();
11592 Val = VPSN->getValue();
11593 VL = VPSN->getVectorLength();
11594 // VP doesn't support truncating stores.
11595 IsTruncatingStore = false;
11596 } else {
11597 // Else it must be a MSCATTER.
11598 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
11599 Index = MSN->getIndex();
11600 Mask = MSN->getMask();
11601 Val = MSN->getValue();
11602 IsTruncatingStore = MSN->isTruncatingStore();
11603 }
11604
11605 MVT VT = Val.getSimpleValueType();
11606 MVT IndexVT = Index.getSimpleValueType();
11607 MVT XLenVT = Subtarget.getXLenVT();
11608
11610 "Unexpected VTs!");
11611 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
11612 // Targets have to explicitly opt-in for extending vector loads and
11613 // truncating vector stores.
11614 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
11615
11616 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
11617 // the selection of the masked intrinsics doesn't do this for us.
11618 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11619
11620 MVT ContainerVT = VT;
11621 if (VT.isFixedLengthVector()) {
11622 ContainerVT = getContainerForFixedLengthVector(VT);
11623 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
11624 ContainerVT.getVectorElementCount());
11625
11626 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
11627 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
11628
11629 if (!IsUnmasked) {
11630 MVT MaskVT = getMaskTypeFor(ContainerVT);
11631 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11632 }
11633 }
11634
11635 if (!VL)
11636 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11637
11638 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
11639 IndexVT = IndexVT.changeVectorElementType(XLenVT);
11640 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
11641 }
11642
11643 unsigned IntID =
11644 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
11645 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11646 Ops.push_back(Val);
11647 Ops.push_back(BasePtr);
11648 Ops.push_back(Index);
11649 if (!IsUnmasked)
11650 Ops.push_back(Mask);
11651 Ops.push_back(VL);
11652
11654 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
11655}
11656
11657SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
11658 SelectionDAG &DAG) const {
11659 const MVT XLenVT = Subtarget.getXLenVT();
11660 SDLoc DL(Op);
11661 SDValue Chain = Op->getOperand(0);
11662 SDValue SysRegNo = DAG.getTargetConstant(
11663 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
11664 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
11665 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
11666
11667 // Encoding used for rounding mode in RISC-V differs from that used in
11668 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
11669 // table, which consists of a sequence of 4-bit fields, each representing
11670 // corresponding FLT_ROUNDS mode.
11671 static const int Table =
11677
11678 SDValue Shift =
11679 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
11680 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
11681 DAG.getConstant(Table, DL, XLenVT), Shift);
11682 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
11683 DAG.getConstant(7, DL, XLenVT));
11684
11685 return DAG.getMergeValues({Masked, Chain}, DL);
11686}
11687
11688SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
11689 SelectionDAG &DAG) const {
11690 const MVT XLenVT = Subtarget.getXLenVT();
11691 SDLoc DL(Op);
11692 SDValue Chain = Op->getOperand(0);
11693 SDValue RMValue = Op->getOperand(1);
11694 SDValue SysRegNo = DAG.getTargetConstant(
11695 RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
11696
11697 // Encoding used for rounding mode in RISC-V differs from that used in
11698 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
11699 // a table, which consists of a sequence of 4-bit fields, each representing
11700 // corresponding RISC-V mode.
11701 static const unsigned Table =
11707
11708 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
11709
11710 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
11711 DAG.getConstant(2, DL, XLenVT));
11712 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
11713 DAG.getConstant(Table, DL, XLenVT), Shift);
11714 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
11715 DAG.getConstant(0x7, DL, XLenVT));
11716 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
11717 RMValue);
11718}
11719
11720SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
11721 SelectionDAG &DAG) const {
11723
11724 bool isRISCV64 = Subtarget.is64Bit();
11725 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11726
11727 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
11728 return DAG.getFrameIndex(FI, PtrVT);
11729}
11730
11731// Returns the opcode of the target-specific SDNode that implements the 32-bit
11732// form of the given Opcode.
11733static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
11734 switch (Opcode) {
11735 default:
11736 llvm_unreachable("Unexpected opcode");
11737 case ISD::SHL:
11738 return RISCVISD::SLLW;
11739 case ISD::SRA:
11740 return RISCVISD::SRAW;
11741 case ISD::SRL:
11742 return RISCVISD::SRLW;
11743 case ISD::SDIV:
11744 return RISCVISD::DIVW;
11745 case ISD::UDIV:
11746 return RISCVISD::DIVUW;
11747 case ISD::UREM:
11748 return RISCVISD::REMUW;
11749 case ISD::ROTL:
11750 return RISCVISD::ROLW;
11751 case ISD::ROTR:
11752 return RISCVISD::RORW;
11753 }
11754}
11755
11756// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
11757// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
11758// otherwise be promoted to i64, making it difficult to select the
11759// SLLW/DIVUW/.../*W later one because the fact the operation was originally of
11760// type i8/i16/i32 is lost.
11762 unsigned ExtOpc = ISD::ANY_EXTEND) {
11763 SDLoc DL(N);
11764 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
11765 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
11766 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
11767 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
11768 // ReplaceNodeResults requires we maintain the same type for the return value.
11769 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
11770}
11771
11772// Converts the given 32-bit operation to a i64 operation with signed extension
11773// semantic to reduce the signed extension instructions.
11775 SDLoc DL(N);
11776 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
11777 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
11778 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
11779 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
11780 DAG.getValueType(MVT::i32));
11781 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
11782}
11783
11786 SelectionDAG &DAG) const {
11787 SDLoc DL(N);
11788 switch (N->getOpcode()) {
11789 default:
11790 llvm_unreachable("Don't know how to custom type legalize this operation!");
11793 case ISD::FP_TO_SINT:
11794 case ISD::FP_TO_UINT: {
11795 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
11796 "Unexpected custom legalisation");
11797 bool IsStrict = N->isStrictFPOpcode();
11798 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
11799 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
11800 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
11801 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
11803 if (!isTypeLegal(Op0.getValueType()))
11804 return;
11805 if (IsStrict) {
11806 SDValue Chain = N->getOperand(0);
11807 // In absense of Zfh, promote f16 to f32, then convert.
11808 if (Op0.getValueType() == MVT::f16 &&
11809 !Subtarget.hasStdExtZfhOrZhinx()) {
11810 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
11811 {Chain, Op0});
11812 Chain = Op0.getValue(1);
11813 }
11814 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
11816 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
11817 SDValue Res = DAG.getNode(
11818 Opc, DL, VTs, Chain, Op0,
11819 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
11820 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11821 Results.push_back(Res.getValue(1));
11822 return;
11823 }
11824 // For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then
11825 // convert.
11826 if ((Op0.getValueType() == MVT::f16 &&
11827 !Subtarget.hasStdExtZfhOrZhinx()) ||
11828 Op0.getValueType() == MVT::bf16)
11829 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
11830
11831 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
11832 SDValue Res =
11833 DAG.getNode(Opc, DL, MVT::i64, Op0,
11834 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
11835 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11836 return;
11837 }
11838 // If the FP type needs to be softened, emit a library call using the 'si'
11839 // version. If we left it to default legalization we'd end up with 'di'. If
11840 // the FP type doesn't need to be softened just let generic type
11841 // legalization promote the result type.
11842 RTLIB::Libcall LC;
11843 if (IsSigned)
11844 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
11845 else
11846 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
11847 MakeLibCallOptions CallOptions;
11848 EVT OpVT = Op0.getValueType();
11849 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
11850 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
11851 SDValue Result;
11852 std::tie(Result, Chain) =
11853 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
11854 Results.push_back(Result);
11855 if (IsStrict)
11856 Results.push_back(Chain);
11857 break;
11858 }
11859 case ISD::LROUND: {
11860 SDValue Op0 = N->getOperand(0);
11861 EVT Op0VT = Op0.getValueType();
11862 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
11864 if (!isTypeLegal(Op0VT))
11865 return;
11866
11867 // In absense of Zfh, promote f16 to f32, then convert.
11868 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
11869 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
11870
11871 SDValue Res =
11872 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
11873 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
11874 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
11875 return;
11876 }
11877 // If the FP type needs to be softened, emit a library call to lround. We'll
11878 // need to truncate the result. We assume any value that doesn't fit in i32
11879 // is allowed to return an unspecified value.
11880 RTLIB::Libcall LC =
11881 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
11882 MakeLibCallOptions CallOptions;
11883 EVT OpVT = Op0.getValueType();
11884 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
11885 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
11886 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
11887 Results.push_back(Result);
11888 break;
11889 }
11892 assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "
11893 "has custom type legalization on riscv32");
11894
11895 SDValue LoCounter, HiCounter;
11896 MVT XLenVT = Subtarget.getXLenVT();
11897 if (N->getOpcode() == ISD::READCYCLECOUNTER) {
11898 LoCounter = DAG.getTargetConstant(
11899 RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding, DL, XLenVT);
11900 HiCounter = DAG.getTargetConstant(
11901 RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding, DL, XLenVT);
11902 } else {
11903 LoCounter = DAG.getTargetConstant(
11904 RISCVSysReg::lookupSysRegByName("TIME")->Encoding, DL, XLenVT);
11905 HiCounter = DAG.getTargetConstant(
11906 RISCVSysReg::lookupSysRegByName("TIMEH")->Encoding, DL, XLenVT);
11907 }
11908 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
11910 N->getOperand(0), LoCounter, HiCounter);
11911
11912 Results.push_back(
11913 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
11914 Results.push_back(RCW.getValue(2));
11915 break;
11916 }
11917 case ISD::LOAD: {
11918 if (!ISD::isNON_EXTLoad(N))
11919 return;
11920
11921 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
11922 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
11923 LoadSDNode *Ld = cast<LoadSDNode>(N);
11924
11925 SDLoc dl(N);
11926 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
11927 Ld->getBasePtr(), Ld->getMemoryVT(),
11928 Ld->getMemOperand());
11929 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
11930 Results.push_back(Res.getValue(1));
11931 return;
11932 }
11933 case ISD::MUL: {
11934 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
11935 unsigned XLen = Subtarget.getXLen();
11936 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
11937 if (Size > XLen) {
11938 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
11939 SDValue LHS = N->getOperand(0);
11940 SDValue RHS = N->getOperand(1);
11941 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
11942
11943 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
11944 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
11945 // We need exactly one side to be unsigned.
11946 if (LHSIsU == RHSIsU)
11947 return;
11948
11949 auto MakeMULPair = [&](SDValue S, SDValue U) {
11950 MVT XLenVT = Subtarget.getXLenVT();
11951 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
11952 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
11953 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
11954 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
11955 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
11956 };
11957
11958 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
11959 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
11960
11961 // The other operand should be signed, but still prefer MULH when
11962 // possible.
11963 if (RHSIsU && LHSIsS && !RHSIsS)
11964 Results.push_back(MakeMULPair(LHS, RHS));
11965 else if (LHSIsU && RHSIsS && !LHSIsS)
11966 Results.push_back(MakeMULPair(RHS, LHS));
11967
11968 return;
11969 }
11970 [[fallthrough]];
11971 }
11972 case ISD::ADD:
11973 case ISD::SUB:
11974 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
11975 "Unexpected custom legalisation");
11976 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
11977 break;
11978 case ISD::SHL:
11979 case ISD::SRA:
11980 case ISD::SRL:
11981 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
11982 "Unexpected custom legalisation");
11983 if (N->getOperand(1).getOpcode() != ISD::Constant) {
11984 // If we can use a BSET instruction, allow default promotion to apply.
11985 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
11986 isOneConstant(N->getOperand(0)))
11987 break;
11988 Results.push_back(customLegalizeToWOp(N, DAG));
11989 break;
11990 }
11991
11992 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
11993 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
11994 // shift amount.
11995 if (N->getOpcode() == ISD::SHL) {
11996 SDLoc DL(N);
11997 SDValue NewOp0 =
11998 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
11999 SDValue NewOp1 =
12000 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
12001 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
12002 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
12003 DAG.getValueType(MVT::i32));
12004 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
12005 }
12006
12007 break;
12008 case ISD::ROTL:
12009 case ISD::ROTR:
12010 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12011 "Unexpected custom legalisation");
12012 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
12013 Subtarget.hasVendorXTHeadBb()) &&
12014 "Unexpected custom legalization");
12015 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
12016 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
12017 return;
12018 Results.push_back(customLegalizeToWOp(N, DAG));
12019 break;
12020 case ISD::CTTZ:
12022 case ISD::CTLZ:
12023 case ISD::CTLZ_ZERO_UNDEF: {
12024 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12025 "Unexpected custom legalisation");
12026
12027 SDValue NewOp0 =
12028 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12029 bool IsCTZ =
12030 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
12031 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
12032 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
12033 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12034 return;
12035 }
12036 case ISD::SDIV:
12037 case ISD::UDIV:
12038 case ISD::UREM: {
12039 MVT VT = N->getSimpleValueType(0);
12040 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
12041 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
12042 "Unexpected custom legalisation");
12043 // Don't promote division/remainder by constant since we should expand those
12044 // to multiply by magic constant.
12046 if (N->getOperand(1).getOpcode() == ISD::Constant &&
12047 !isIntDivCheap(N->getValueType(0), Attr))
12048 return;
12049
12050 // If the input is i32, use ANY_EXTEND since the W instructions don't read
12051 // the upper 32 bits. For other types we need to sign or zero extend
12052 // based on the opcode.
12053 unsigned ExtOpc = ISD::ANY_EXTEND;
12054 if (VT != MVT::i32)
12055 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
12057
12058 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
12059 break;
12060 }
12061 case ISD::SADDO: {
12062 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12063 "Unexpected custom legalisation");
12064
12065 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
12066 // use the default legalization.
12067 if (!isa<ConstantSDNode>(N->getOperand(1)))
12068 return;
12069
12070 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12071 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
12072 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
12073 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
12074 DAG.getValueType(MVT::i32));
12075
12076 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
12077
12078 // For an addition, the result should be less than one of the operands (LHS)
12079 // if and only if the other operand (RHS) is negative, otherwise there will
12080 // be overflow.
12081 // For a subtraction, the result should be less than one of the operands
12082 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
12083 // otherwise there will be overflow.
12084 EVT OType = N->getValueType(1);
12085 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
12086 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
12087
12088 SDValue Overflow =
12089 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
12090 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12091 Results.push_back(Overflow);
12092 return;
12093 }
12094 case ISD::UADDO:
12095 case ISD::USUBO: {
12096 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12097 "Unexpected custom legalisation");
12098 bool IsAdd = N->getOpcode() == ISD::UADDO;
12099 // Create an ADDW or SUBW.
12100 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12101 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12102 SDValue Res =
12103 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
12104 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
12105 DAG.getValueType(MVT::i32));
12106
12107 SDValue Overflow;
12108 if (IsAdd && isOneConstant(RHS)) {
12109 // Special case uaddo X, 1 overflowed if the addition result is 0.
12110 // The general case (X + C) < C is not necessarily beneficial. Although we
12111 // reduce the live range of X, we may introduce the materialization of
12112 // constant C, especially when the setcc result is used by branch. We have
12113 // no compare with constant and branch instructions.
12114 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
12115 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
12116 } else if (IsAdd && isAllOnesConstant(RHS)) {
12117 // Special case uaddo X, -1 overflowed if X != 0.
12118 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
12119 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
12120 } else {
12121 // Sign extend the LHS and perform an unsigned compare with the ADDW
12122 // result. Since the inputs are sign extended from i32, this is equivalent
12123 // to comparing the lower 32 bits.
12124 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12125 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
12126 IsAdd ? ISD::SETULT : ISD::SETUGT);
12127 }
12128
12129 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12130 Results.push_back(Overflow);
12131 return;
12132 }
12133 case ISD::UADDSAT:
12134 case ISD::USUBSAT: {
12135 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12136 "Unexpected custom legalisation");
12137 if (Subtarget.hasStdExtZbb()) {
12138 // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
12139 // sign extend allows overflow of the lower 32 bits to be detected on
12140 // the promoted size.
12141 SDValue LHS =
12142 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
12143 SDValue RHS =
12144 DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
12145 SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS);
12146 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12147 return;
12148 }
12149
12150 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
12151 // promotion for UADDO/USUBO.
12152 Results.push_back(expandAddSubSat(N, DAG));
12153 return;
12154 }
12155 case ISD::SADDSAT:
12156 case ISD::SSUBSAT: {
12157 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12158 "Unexpected custom legalisation");
12159 Results.push_back(expandAddSubSat(N, DAG));
12160 return;
12161 }
12162 case ISD::ABS: {
12163 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12164 "Unexpected custom legalisation");
12165
12166 if (Subtarget.hasStdExtZbb()) {
12167 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
12168 // This allows us to remember that the result is sign extended. Expanding
12169 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
12170 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
12171 N->getOperand(0));
12172 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
12173 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
12174 return;
12175 }
12176
12177 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
12178 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12179
12180 // Freeze the source so we can increase it's use count.
12181 Src = DAG.getFreeze(Src);
12182
12183 // Copy sign bit to all bits using the sraiw pattern.
12184 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
12185 DAG.getValueType(MVT::i32));
12186 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
12187 DAG.getConstant(31, DL, MVT::i64));
12188
12189 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
12190 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
12191
12192 // NOTE: The result is only required to be anyextended, but sext is
12193 // consistent with type legalization of sub.
12194 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
12195 DAG.getValueType(MVT::i32));
12196 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
12197 return;
12198 }
12199 case ISD::BITCAST: {
12200 EVT VT = N->getValueType(0);
12201 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
12202 SDValue Op0 = N->getOperand(0);
12203 EVT Op0VT = Op0.getValueType();
12204 MVT XLenVT = Subtarget.getXLenVT();
12205 if (VT == MVT::i16 && Op0VT == MVT::f16 &&
12206 Subtarget.hasStdExtZfhminOrZhinxmin()) {
12207 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
12208 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
12209 } else if (VT == MVT::i16 && Op0VT == MVT::bf16 &&
12210 Subtarget.hasStdExtZfbfmin()) {
12211 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
12212 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
12213 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
12214 Subtarget.hasStdExtFOrZfinx()) {
12215 SDValue FPConv =
12216 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
12217 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
12218 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && XLenVT == MVT::i32) {
12219 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
12220 DAG.getVTList(MVT::i32, MVT::i32), Op0);
12221 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
12222 NewReg.getValue(0), NewReg.getValue(1));
12223 Results.push_back(RetReg);
12224 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
12225 isTypeLegal(Op0VT)) {
12226 // Custom-legalize bitcasts from fixed-length vector types to illegal
12227 // scalar types in order to improve codegen. Bitcast the vector to a
12228 // one-element vector type whose element type is the same as the result
12229 // type, and extract the first element.
12230 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
12231 if (isTypeLegal(BVT)) {
12232 SDValue BVec = DAG.getBitcast(BVT, Op0);
12233 Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
12234 DAG.getVectorIdxConstant(0, DL)));
12235 }
12236 }
12237 break;
12238 }
12239 case RISCVISD::BREV8: {
12240 MVT VT = N->getSimpleValueType(0);
12241 MVT XLenVT = Subtarget.getXLenVT();
12242 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
12243 "Unexpected custom legalisation");
12244 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
12245 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
12246 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
12247 // ReplaceNodeResults requires we maintain the same type for the return
12248 // value.
12249 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
12250 break;
12251 }
12253 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
12254 // type is illegal (currently only vXi64 RV32).
12255 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
12256 // transferred to the destination register. We issue two of these from the
12257 // upper- and lower- halves of the SEW-bit vector element, slid down to the
12258 // first element.
12259 SDValue Vec = N->getOperand(0);
12260 SDValue Idx = N->getOperand(1);
12261
12262 // The vector type hasn't been legalized yet so we can't issue target
12263 // specific nodes if it needs legalization.
12264 // FIXME: We would manually legalize if it's important.
12265 if (!isTypeLegal(Vec.getValueType()))
12266 return;
12267
12268 MVT VecVT = Vec.getSimpleValueType();
12269
12270 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
12271 VecVT.getVectorElementType() == MVT::i64 &&
12272 "Unexpected EXTRACT_VECTOR_ELT legalization");
12273
12274 // If this is a fixed vector, we need to convert it to a scalable vector.
12275 MVT ContainerVT = VecVT;
12276 if (VecVT.isFixedLengthVector()) {
12277 ContainerVT = getContainerForFixedLengthVector(VecVT);
12278 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
12279 }
12280
12281 MVT XLenVT = Subtarget.getXLenVT();
12282
12283 // Use a VL of 1 to avoid processing more elements than we need.
12284 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
12285
12286 // Unless the index is known to be 0, we must slide the vector down to get
12287 // the desired element into index 0.
12288 if (!isNullConstant(Idx)) {
12289 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
12290 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
12291 }
12292
12293 // Extract the lower XLEN bits of the correct vector element.
12294 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
12295
12296 // To extract the upper XLEN bits of the vector element, shift the first
12297 // element right by 32 bits and re-extract the lower XLEN bits.
12298 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12299 DAG.getUNDEF(ContainerVT),
12300 DAG.getConstant(32, DL, XLenVT), VL);
12301 SDValue LShr32 =
12302 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
12303 DAG.getUNDEF(ContainerVT), Mask, VL);
12304
12305 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
12306
12307 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
12308 break;
12309 }
12311 unsigned IntNo = N->getConstantOperandVal(0);
12312 switch (IntNo) {
12313 default:
12315 "Don't know how to custom type legalize this intrinsic!");
12316 case Intrinsic::experimental_get_vector_length: {
12317 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
12318 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12319 return;
12320 }
12321 case Intrinsic::riscv_orc_b:
12322 case Intrinsic::riscv_brev8:
12323 case Intrinsic::riscv_sha256sig0:
12324 case Intrinsic::riscv_sha256sig1:
12325 case Intrinsic::riscv_sha256sum0:
12326 case Intrinsic::riscv_sha256sum1:
12327 case Intrinsic::riscv_sm3p0:
12328 case Intrinsic::riscv_sm3p1: {
12329 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12330 return;
12331 unsigned Opc;
12332 switch (IntNo) {
12333 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
12334 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
12335 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
12336 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
12337 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
12338 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
12339 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
12340 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
12341 }
12342
12343 SDValue NewOp =
12344 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12345 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
12346 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12347 return;
12348 }
12349 case Intrinsic::riscv_sm4ks:
12350 case Intrinsic::riscv_sm4ed: {
12351 unsigned Opc =
12352 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
12353 SDValue NewOp0 =
12354 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12355 SDValue NewOp1 =
12356 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12357 SDValue Res =
12358 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
12359 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12360 return;
12361 }
12362 case Intrinsic::riscv_mopr: {
12363 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12364 return;
12365 SDValue NewOp =
12366 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12367 SDValue Res = DAG.getNode(
12368 RISCVISD::MOPR, DL, MVT::i64, NewOp,
12369 DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
12370 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12371 return;
12372 }
12373 case Intrinsic::riscv_moprr: {
12374 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12375 return;
12376 SDValue NewOp0 =
12377 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12378 SDValue NewOp1 =
12379 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12380 SDValue Res = DAG.getNode(
12381 RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
12382 DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
12383 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12384 return;
12385 }
12386 case Intrinsic::riscv_clmul: {
12387 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12388 return;
12389
12390 SDValue NewOp0 =
12391 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12392 SDValue NewOp1 =
12393 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12394 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
12395 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12396 return;
12397 }
12398 case Intrinsic::riscv_clmulh:
12399 case Intrinsic::riscv_clmulr: {
12400 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
12401 return;
12402
12403 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
12404 // to the full 128-bit clmul result of multiplying two xlen values.
12405 // Perform clmulr or clmulh on the shifted values. Finally, extract the
12406 // upper 32 bits.
12407 //
12408 // The alternative is to mask the inputs to 32 bits and use clmul, but
12409 // that requires two shifts to mask each input without zext.w.
12410 // FIXME: If the inputs are known zero extended or could be freely
12411 // zero extended, the mask form would be better.
12412 SDValue NewOp0 =
12413 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12414 SDValue NewOp1 =
12415 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
12416 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
12417 DAG.getConstant(32, DL, MVT::i64));
12418 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
12419 DAG.getConstant(32, DL, MVT::i64));
12420 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
12422 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
12423 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
12424 DAG.getConstant(32, DL, MVT::i64));
12425 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12426 return;
12427 }
12428 case Intrinsic::riscv_vmv_x_s: {
12429 EVT VT = N->getValueType(0);
12430 MVT XLenVT = Subtarget.getXLenVT();
12431 if (VT.bitsLT(XLenVT)) {
12432 // Simple case just extract using vmv.x.s and truncate.
12433 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
12434 Subtarget.getXLenVT(), N->getOperand(1));
12435 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
12436 return;
12437 }
12438
12439 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
12440 "Unexpected custom legalization");
12441
12442 // We need to do the move in two steps.
12443 SDValue Vec = N->getOperand(1);
12444 MVT VecVT = Vec.getSimpleValueType();
12445
12446 // First extract the lower XLEN bits of the element.
12447 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
12448
12449 // To extract the upper XLEN bits of the vector element, shift the first
12450 // element right by 32 bits and re-extract the lower XLEN bits.
12451 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
12452
12453 SDValue ThirtyTwoV =
12454 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
12455 DAG.getConstant(32, DL, XLenVT), VL);
12456 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
12457 DAG.getUNDEF(VecVT), Mask, VL);
12458 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
12459
12460 Results.push_back(
12461 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
12462 break;
12463 }
12464 }
12465 break;
12466 }
12467 case ISD::VECREDUCE_ADD:
12468 case ISD::VECREDUCE_AND:
12469 case ISD::VECREDUCE_OR:
12470 case ISD::VECREDUCE_XOR:
12475 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
12476 Results.push_back(V);
12477 break;
12478 case ISD::VP_REDUCE_ADD:
12479 case ISD::VP_REDUCE_AND:
12480 case ISD::VP_REDUCE_OR:
12481 case ISD::VP_REDUCE_XOR:
12482 case ISD::VP_REDUCE_SMAX:
12483 case ISD::VP_REDUCE_UMAX:
12484 case ISD::VP_REDUCE_SMIN:
12485 case ISD::VP_REDUCE_UMIN:
12486 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
12487 Results.push_back(V);
12488 break;
12489 case ISD::GET_ROUNDING: {
12490 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
12491 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
12492 Results.push_back(Res.getValue(0));
12493 Results.push_back(Res.getValue(1));
12494 break;
12495 }
12496 }
12497}
12498
12499/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
12500/// which corresponds to it.
12501static unsigned getVecReduceOpcode(unsigned Opc) {
12502 switch (Opc) {
12503 default:
12504 llvm_unreachable("Unhandled binary to transfrom reduction");
12505 case ISD::ADD:
12506 return ISD::VECREDUCE_ADD;
12507 case ISD::UMAX:
12508 return ISD::VECREDUCE_UMAX;
12509 case ISD::SMAX:
12510 return ISD::VECREDUCE_SMAX;
12511 case ISD::UMIN:
12512 return ISD::VECREDUCE_UMIN;
12513 case ISD::SMIN:
12514 return ISD::VECREDUCE_SMIN;
12515 case ISD::AND:
12516 return ISD::VECREDUCE_AND;
12517 case ISD::OR:
12518 return ISD::VECREDUCE_OR;
12519 case ISD::XOR:
12520 return ISD::VECREDUCE_XOR;
12521 case ISD::FADD:
12522 // Note: This is the associative form of the generic reduction opcode.
12523 return ISD::VECREDUCE_FADD;
12524 }
12525}
12526
12527/// Perform two related transforms whose purpose is to incrementally recognize
12528/// an explode_vector followed by scalar reduction as a vector reduction node.
12529/// This exists to recover from a deficiency in SLP which can't handle
12530/// forests with multiple roots sharing common nodes. In some cases, one
12531/// of the trees will be vectorized, and the other will remain (unprofitably)
12532/// scalarized.
12533static SDValue
12535 const RISCVSubtarget &Subtarget) {
12536
12537 // This transforms need to run before all integer types have been legalized
12538 // to i64 (so that the vector element type matches the add type), and while
12539 // it's safe to introduce odd sized vector types.
12541 return SDValue();
12542
12543 // Without V, this transform isn't useful. We could form the (illegal)
12544 // operations and let them be scalarized again, but there's really no point.
12545 if (!Subtarget.hasVInstructions())
12546 return SDValue();
12547
12548 const SDLoc DL(N);
12549 const EVT VT = N->getValueType(0);
12550 const unsigned Opc = N->getOpcode();
12551
12552 // For FADD, we only handle the case with reassociation allowed. We
12553 // could handle strict reduction order, but at the moment, there's no
12554 // known reason to, and the complexity isn't worth it.
12555 // TODO: Handle fminnum and fmaxnum here
12556 if (!VT.isInteger() &&
12557 (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation()))
12558 return SDValue();
12559
12560 const unsigned ReduceOpc = getVecReduceOpcode(Opc);
12561 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
12562 "Inconsistent mappings");
12563 SDValue LHS = N->getOperand(0);
12564 SDValue RHS = N->getOperand(1);
12565
12566 if (!LHS.hasOneUse() || !RHS.hasOneUse())
12567 return SDValue();
12568
12569 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
12570 std::swap(LHS, RHS);
12571
12572 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
12573 !isa<ConstantSDNode>(RHS.getOperand(1)))
12574 return SDValue();
12575
12576 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
12577 SDValue SrcVec = RHS.getOperand(0);
12578 EVT SrcVecVT = SrcVec.getValueType();
12579 assert(SrcVecVT.getVectorElementType() == VT);
12580 if (SrcVecVT.isScalableVector())
12581 return SDValue();
12582
12583 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
12584 return SDValue();
12585
12586 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
12587 // reduce_op (extract_subvector [2 x VT] from V). This will form the
12588 // root of our reduction tree. TODO: We could extend this to any two
12589 // adjacent aligned constant indices if desired.
12590 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12591 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
12592 uint64_t LHSIdx =
12593 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
12594 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
12595 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
12596 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
12597 DAG.getVectorIdxConstant(0, DL));
12598 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
12599 }
12600 }
12601
12602 // Match (binop (reduce (extract_subvector V, 0),
12603 // (extract_vector_elt V, sizeof(SubVec))))
12604 // into a reduction of one more element from the original vector V.
12605 if (LHS.getOpcode() != ReduceOpc)
12606 return SDValue();
12607
12608 SDValue ReduceVec = LHS.getOperand(0);
12609 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
12610 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
12611 isNullConstant(ReduceVec.getOperand(1)) &&
12612 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
12613 // For illegal types (e.g. 3xi32), most will be combined again into a
12614 // wider (hopefully legal) type. If this is a terminal state, we are
12615 // relying on type legalization here to produce something reasonable
12616 // and this lowering quality could probably be improved. (TODO)
12617 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
12618 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
12619 DAG.getVectorIdxConstant(0, DL));
12620 auto Flags = ReduceVec->getFlags();
12621 Flags.intersectWith(N->getFlags());
12622 return DAG.getNode(ReduceOpc, DL, VT, Vec, Flags);
12623 }
12624
12625 return SDValue();
12626}
12627
12628
12629// Try to fold (<bop> x, (reduction.<bop> vec, start))
12631 const RISCVSubtarget &Subtarget) {
12632 auto BinOpToRVVReduce = [](unsigned Opc) {
12633 switch (Opc) {
12634 default:
12635 llvm_unreachable("Unhandled binary to transfrom reduction");
12636 case ISD::ADD:
12638 case ISD::UMAX:
12640 case ISD::SMAX:
12642 case ISD::UMIN:
12644 case ISD::SMIN:
12646 case ISD::AND:
12648 case ISD::OR:
12650 case ISD::XOR:
12652 case ISD::FADD:
12654 case ISD::FMAXNUM:
12656 case ISD::FMINNUM:
12658 }
12659 };
12660
12661 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
12662 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12663 isNullConstant(V.getOperand(1)) &&
12664 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
12665 };
12666
12667 unsigned Opc = N->getOpcode();
12668 unsigned ReduceIdx;
12669 if (IsReduction(N->getOperand(0), Opc))
12670 ReduceIdx = 0;
12671 else if (IsReduction(N->getOperand(1), Opc))
12672 ReduceIdx = 1;
12673 else
12674 return SDValue();
12675
12676 // Skip if FADD disallows reassociation but the combiner needs.
12677 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
12678 return SDValue();
12679
12680 SDValue Extract = N->getOperand(ReduceIdx);
12681 SDValue Reduce = Extract.getOperand(0);
12682 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
12683 return SDValue();
12684
12685 SDValue ScalarV = Reduce.getOperand(2);
12686 EVT ScalarVT = ScalarV.getValueType();
12687 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
12688 ScalarV.getOperand(0)->isUndef() &&
12689 isNullConstant(ScalarV.getOperand(2)))
12690 ScalarV = ScalarV.getOperand(1);
12691
12692 // Make sure that ScalarV is a splat with VL=1.
12693 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
12694 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
12695 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
12696 return SDValue();
12697
12698 if (!isNonZeroAVL(ScalarV.getOperand(2)))
12699 return SDValue();
12700
12701 // Check the scalar of ScalarV is neutral element
12702 // TODO: Deal with value other than neutral element.
12703 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
12704 0))
12705 return SDValue();
12706
12707 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
12708 // FIXME: We might be able to improve this if operand 0 is undef.
12709 if (!isNonZeroAVL(Reduce.getOperand(5)))
12710 return SDValue();
12711
12712 SDValue NewStart = N->getOperand(1 - ReduceIdx);
12713
12714 SDLoc DL(N);
12715 SDValue NewScalarV =
12716 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
12717 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
12718
12719 // If we looked through an INSERT_SUBVECTOR we need to restore it.
12720 if (ScalarVT != ScalarV.getValueType())
12721 NewScalarV =
12722 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ScalarVT, DAG.getUNDEF(ScalarVT),
12723 NewScalarV, DAG.getVectorIdxConstant(0, DL));
12724
12725 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
12726 NewScalarV, Reduce.getOperand(3),
12727 Reduce.getOperand(4), Reduce.getOperand(5)};
12728 SDValue NewReduce =
12729 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
12730 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
12731 Extract.getOperand(1));
12732}
12733
12734// Optimize (add (shl x, c0), (shl y, c1)) ->
12735// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
12737 const RISCVSubtarget &Subtarget) {
12738 // Perform this optimization only in the zba extension.
12739 if (!Subtarget.hasStdExtZba())
12740 return SDValue();
12741
12742 // Skip for vector types and larger types.
12743 EVT VT = N->getValueType(0);
12744 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
12745 return SDValue();
12746
12747 // The two operand nodes must be SHL and have no other use.
12748 SDValue N0 = N->getOperand(0);
12749 SDValue N1 = N->getOperand(1);
12750 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
12751 !N0->hasOneUse() || !N1->hasOneUse())
12752 return SDValue();
12753
12754 // Check c0 and c1.
12755 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
12756 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
12757 if (!N0C || !N1C)
12758 return SDValue();
12759 int64_t C0 = N0C->getSExtValue();
12760 int64_t C1 = N1C->getSExtValue();
12761 if (C0 <= 0 || C1 <= 0)
12762 return SDValue();
12763
12764 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
12765 int64_t Bits = std::min(C0, C1);
12766 int64_t Diff = std::abs(C0 - C1);
12767 if (Diff != 1 && Diff != 2 && Diff != 3)
12768 return SDValue();
12769
12770 // Build nodes.
12771 SDLoc DL(N);
12772 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
12773 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
12774 SDValue NA0 =
12775 DAG.getNode(ISD::SHL, DL, VT, NL, DAG.getConstant(Diff, DL, VT));
12776 SDValue NA1 = DAG.getNode(ISD::ADD, DL, VT, NA0, NS);
12777 return DAG.getNode(ISD::SHL, DL, VT, NA1, DAG.getConstant(Bits, DL, VT));
12778}
12779
12780// Combine a constant select operand into its use:
12781//
12782// (and (select cond, -1, c), x)
12783// -> (select cond, x, (and x, c)) [AllOnes=1]
12784// (or (select cond, 0, c), x)
12785// -> (select cond, x, (or x, c)) [AllOnes=0]
12786// (xor (select cond, 0, c), x)
12787// -> (select cond, x, (xor x, c)) [AllOnes=0]
12788// (add (select cond, 0, c), x)
12789// -> (select cond, x, (add x, c)) [AllOnes=0]
12790// (sub x, (select cond, 0, c))
12791// -> (select cond, x, (sub x, c)) [AllOnes=0]
12793 SelectionDAG &DAG, bool AllOnes,
12794 const RISCVSubtarget &Subtarget) {
12795 EVT VT = N->getValueType(0);
12796
12797 // Skip vectors.
12798 if (VT.isVector())
12799 return SDValue();
12800
12801 if (!Subtarget.hasConditionalMoveFusion()) {
12802 // (select cond, x, (and x, c)) has custom lowering with Zicond.
12803 if ((!Subtarget.hasStdExtZicond() &&
12804 !Subtarget.hasVendorXVentanaCondOps()) ||
12805 N->getOpcode() != ISD::AND)
12806 return SDValue();
12807
12808 // Maybe harmful when condition code has multiple use.
12809 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
12810 return SDValue();
12811
12812 // Maybe harmful when VT is wider than XLen.
12813 if (VT.getSizeInBits() > Subtarget.getXLen())
12814 return SDValue();
12815 }
12816
12817 if ((Slct.getOpcode() != ISD::SELECT &&
12818 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
12819 !Slct.hasOneUse())
12820 return SDValue();
12821
12822 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
12824 };
12825
12826 bool SwapSelectOps;
12827 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
12828 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
12829 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
12830 SDValue NonConstantVal;
12831 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
12832 SwapSelectOps = false;
12833 NonConstantVal = FalseVal;
12834 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
12835 SwapSelectOps = true;
12836 NonConstantVal = TrueVal;
12837 } else
12838 return SDValue();
12839
12840 // Slct is now know to be the desired identity constant when CC is true.
12841 TrueVal = OtherOp;
12842 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
12843 // Unless SwapSelectOps says the condition should be false.
12844 if (SwapSelectOps)
12845 std::swap(TrueVal, FalseVal);
12846
12847 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
12848 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
12849 {Slct.getOperand(0), Slct.getOperand(1),
12850 Slct.getOperand(2), TrueVal, FalseVal});
12851
12852 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
12853 {Slct.getOperand(0), TrueVal, FalseVal});
12854}
12855
12856// Attempt combineSelectAndUse on each operand of a commutative operator N.
12858 bool AllOnes,
12859 const RISCVSubtarget &Subtarget) {
12860 SDValue N0 = N->getOperand(0);
12861 SDValue N1 = N->getOperand(1);
12862 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
12863 return Result;
12864 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
12865 return Result;
12866 return SDValue();
12867}
12868
12869// Transform (add (mul x, c0), c1) ->
12870// (add (mul (add x, c1/c0), c0), c1%c0).
12871// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
12872// that should be excluded is when c0*(c1/c0) is simm12, which will lead
12873// to an infinite loop in DAGCombine if transformed.
12874// Or transform (add (mul x, c0), c1) ->
12875// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
12876// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
12877// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
12878// lead to an infinite loop in DAGCombine if transformed.
12879// Or transform (add (mul x, c0), c1) ->
12880// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
12881// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
12882// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
12883// lead to an infinite loop in DAGCombine if transformed.
12884// Or transform (add (mul x, c0), c1) ->
12885// (mul (add x, c1/c0), c0).
12886// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
12888 const RISCVSubtarget &Subtarget) {
12889 // Skip for vector types and larger types.
12890 EVT VT = N->getValueType(0);
12891 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
12892 return SDValue();
12893 // The first operand node must be a MUL and has no other use.
12894 SDValue N0 = N->getOperand(0);
12895 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
12896 return SDValue();
12897 // Check if c0 and c1 match above conditions.
12898 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
12899 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
12900 if (!N0C || !N1C)
12901 return SDValue();
12902 // If N0C has multiple uses it's possible one of the cases in
12903 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
12904 // in an infinite loop.
12905 if (!N0C->hasOneUse())
12906 return SDValue();
12907 int64_t C0 = N0C->getSExtValue();
12908 int64_t C1 = N1C->getSExtValue();
12909 int64_t CA, CB;
12910 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
12911 return SDValue();
12912 // Search for proper CA (non-zero) and CB that both are simm12.
12913 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
12914 !isInt<12>(C0 * (C1 / C0))) {
12915 CA = C1 / C0;
12916 CB = C1 % C0;
12917 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
12918 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
12919 CA = C1 / C0 + 1;
12920 CB = C1 % C0 - C0;
12921 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
12922 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
12923 CA = C1 / C0 - 1;
12924 CB = C1 % C0 + C0;
12925 } else
12926 return SDValue();
12927 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
12928 SDLoc DL(N);
12929 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
12930 DAG.getConstant(CA, DL, VT));
12931 SDValue New1 =
12932 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getConstant(C0, DL, VT));
12933 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getConstant(CB, DL, VT));
12934}
12935
12936// add (zext, zext) -> zext (add (zext, zext))
12937// sub (zext, zext) -> sext (sub (zext, zext))
12938// mul (zext, zext) -> zext (mul (zext, zext))
12939//
12940// where the sum of the extend widths match, and the the range of the bin op
12941// fits inside the width of the narrower bin op. (For profitability on rvv, we
12942// use a power of two for both inner and outer extend.)
12943//
12944// TODO: Extend this to other binary ops
12946
12947 EVT VT = N->getValueType(0);
12948 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
12949 return SDValue();
12950
12951 SDValue N0 = N->getOperand(0);
12952 SDValue N1 = N->getOperand(1);
12954 return SDValue();
12955 if (!N0.hasOneUse() || !N1.hasOneUse())
12956 return SDValue();
12957
12958 SDValue Src0 = N0.getOperand(0);
12959 SDValue Src1 = N1.getOperand(0);
12960 EVT SrcVT = Src0.getValueType();
12961 if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||
12962 SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||
12963 SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)
12964 return SDValue();
12965
12966 LLVMContext &C = *DAG.getContext();
12968 EVT NarrowVT = EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());
12969
12970 Src0 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src0), NarrowVT, Src0);
12971 Src1 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src1), NarrowVT, Src1);
12972
12973 // Src0 and Src1 are zero extended, so they're always positive if signed.
12974 //
12975 // sub can produce a negative from two positive operands, so it needs sign
12976 // extended. Other nodes produce a positive from two positive operands, so
12977 // zero extend instead.
12978 unsigned OuterExtend =
12979 N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
12980
12981 return DAG.getNode(
12982 OuterExtend, SDLoc(N), VT,
12983 DAG.getNode(N->getOpcode(), SDLoc(N), NarrowVT, Src0, Src1));
12984}
12985
12986// Try to turn (add (xor bool, 1) -1) into (neg bool).
12988 SDValue N0 = N->getOperand(0);
12989 SDValue N1 = N->getOperand(1);
12990 EVT VT = N->getValueType(0);
12991 SDLoc DL(N);
12992
12993 // RHS should be -1.
12994 if (!isAllOnesConstant(N1))
12995 return SDValue();
12996
12997 // Look for (xor X, 1).
12998 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
12999 return SDValue();
13000
13001 // First xor input should be 0 or 1.
13003 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
13004 return SDValue();
13005
13006 // Emit a negate of the setcc.
13007 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
13008 N0.getOperand(0));
13009}
13010
13012 const RISCVSubtarget &Subtarget) {
13013 if (SDValue V = combineAddOfBooleanXor(N, DAG))
13014 return V;
13015 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
13016 return V;
13017 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
13018 return V;
13019 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13020 return V;
13021 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13022 return V;
13023 if (SDValue V = combineBinOpOfZExt(N, DAG))
13024 return V;
13025
13026 // fold (add (select lhs, rhs, cc, 0, y), x) ->
13027 // (select lhs, rhs, cc, x, (add x, y))
13028 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13029}
13030
13031// Try to turn a sub boolean RHS and constant LHS into an addi.
13033 SDValue N0 = N->getOperand(0);
13034 SDValue N1 = N->getOperand(1);
13035 EVT VT = N->getValueType(0);
13036 SDLoc DL(N);
13037
13038 // Require a constant LHS.
13039 auto *N0C = dyn_cast<ConstantSDNode>(N0);
13040 if (!N0C)
13041 return SDValue();
13042
13043 // All our optimizations involve subtracting 1 from the immediate and forming
13044 // an ADDI. Make sure the new immediate is valid for an ADDI.
13045 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
13046 if (!ImmValMinus1.isSignedIntN(12))
13047 return SDValue();
13048
13049 SDValue NewLHS;
13050 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
13051 // (sub constant, (setcc x, y, eq/neq)) ->
13052 // (add (setcc x, y, neq/eq), constant - 1)
13053 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
13054 EVT SetCCOpVT = N1.getOperand(0).getValueType();
13055 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
13056 return SDValue();
13057 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
13058 NewLHS =
13059 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
13060 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
13061 N1.getOperand(0).getOpcode() == ISD::SETCC) {
13062 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
13063 // Since setcc returns a bool the xor is equivalent to 1-setcc.
13064 NewLHS = N1.getOperand(0);
13065 } else
13066 return SDValue();
13067
13068 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
13069 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
13070}
13071
13073 const RISCVSubtarget &Subtarget) {
13074 if (SDValue V = combineSubOfBoolean(N, DAG))
13075 return V;
13076
13077 EVT VT = N->getValueType(0);
13078 SDValue N0 = N->getOperand(0);
13079 SDValue N1 = N->getOperand(1);
13080 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
13081 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
13082 isNullConstant(N1.getOperand(1))) {
13083 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
13084 if (CCVal == ISD::SETLT) {
13085 SDLoc DL(N);
13086 unsigned ShAmt = N0.getValueSizeInBits() - 1;
13087 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
13088 DAG.getConstant(ShAmt, DL, VT));
13089 }
13090 }
13091
13092 if (SDValue V = combineBinOpOfZExt(N, DAG))
13093 return V;
13094
13095 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
13096 // (select lhs, rhs, cc, x, (sub x, y))
13097 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
13098}
13099
13100// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
13101// Legalizing setcc can introduce xors like this. Doing this transform reduces
13102// the number of xors and may allow the xor to fold into a branch condition.
13104 SDValue N0 = N->getOperand(0);
13105 SDValue N1 = N->getOperand(1);
13106 bool IsAnd = N->getOpcode() == ISD::AND;
13107
13108 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
13109 return SDValue();
13110
13111 if (!N0.hasOneUse() || !N1.hasOneUse())
13112 return SDValue();
13113
13114 SDValue N01 = N0.getOperand(1);
13115 SDValue N11 = N1.getOperand(1);
13116
13117 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
13118 // (xor X, -1) based on the upper bits of the other operand being 0. If the
13119 // operation is And, allow one of the Xors to use -1.
13120 if (isOneConstant(N01)) {
13121 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
13122 return SDValue();
13123 } else if (isOneConstant(N11)) {
13124 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
13125 if (!(IsAnd && isAllOnesConstant(N01)))
13126 return SDValue();
13127 } else
13128 return SDValue();
13129
13130 EVT VT = N->getValueType(0);
13131
13132 SDValue N00 = N0.getOperand(0);
13133 SDValue N10 = N1.getOperand(0);
13134
13135 // The LHS of the xors needs to be 0/1.
13137 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
13138 return SDValue();
13139
13140 // Invert the opcode and insert a new xor.
13141 SDLoc DL(N);
13142 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
13143 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
13144 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
13145}
13146
13148 const RISCVSubtarget &Subtarget) {
13149 SDValue N0 = N->getOperand(0);
13150 EVT VT = N->getValueType(0);
13151
13152 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
13153 // extending X. This is safe since we only need the LSB after the shift and
13154 // shift amounts larger than 31 would produce poison. If we wait until
13155 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
13156 // to use a BEXT instruction.
13157 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
13158 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
13159 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
13160 SDLoc DL(N0);
13161 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13162 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13163 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
13164 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
13165 }
13166
13167 return SDValue();
13168}
13169
13170// Combines two comparison operation and logic operation to one selection
13171// operation(min, max) and logic operation. Returns new constructed Node if
13172// conditions for optimization are satisfied.
13175 const RISCVSubtarget &Subtarget) {
13176 SelectionDAG &DAG = DCI.DAG;
13177
13178 SDValue N0 = N->getOperand(0);
13179 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
13180 // extending X. This is safe since we only need the LSB after the shift and
13181 // shift amounts larger than 31 would produce poison. If we wait until
13182 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
13183 // to use a BEXT instruction.
13184 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
13185 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
13186 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
13187 N0.hasOneUse()) {
13188 SDLoc DL(N);
13189 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13190 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13191 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
13192 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
13193 DAG.getConstant(1, DL, MVT::i64));
13194 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
13195 }
13196
13197 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13198 return V;
13199 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13200 return V;
13201
13202 if (DCI.isAfterLegalizeDAG())
13203 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
13204 return V;
13205
13206 // fold (and (select lhs, rhs, cc, -1, y), x) ->
13207 // (select lhs, rhs, cc, x, (and x, y))
13208 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
13209}
13210
13211// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
13212// FIXME: Generalize to other binary operators with same operand.
13214 SelectionDAG &DAG) {
13215 assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
13216
13217 if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
13219 !N0.hasOneUse() || !N1.hasOneUse())
13220 return SDValue();
13221
13222 // Should have the same condition.
13223 SDValue Cond = N0.getOperand(1);
13224 if (Cond != N1.getOperand(1))
13225 return SDValue();
13226
13227 SDValue TrueV = N0.getOperand(0);
13228 SDValue FalseV = N1.getOperand(0);
13229
13230 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
13231 TrueV.getOperand(1) != FalseV.getOperand(1) ||
13232 !isOneConstant(TrueV.getOperand(1)) ||
13233 !TrueV.hasOneUse() || !FalseV.hasOneUse())
13234 return SDValue();
13235
13236 EVT VT = N->getValueType(0);
13237 SDLoc DL(N);
13238
13239 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
13240 Cond);
13241 SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0),
13242 Cond);
13243 SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1);
13244 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
13245}
13246
13248 const RISCVSubtarget &Subtarget) {
13249 SelectionDAG &DAG = DCI.DAG;
13250
13251 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13252 return V;
13253 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13254 return V;
13255
13256 if (DCI.isAfterLegalizeDAG())
13257 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
13258 return V;
13259
13260 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
13261 // We may be able to pull a common operation out of the true and false value.
13262 SDValue N0 = N->getOperand(0);
13263 SDValue N1 = N->getOperand(1);
13264 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
13265 return V;
13266 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
13267 return V;
13268
13269 // fold (or (select cond, 0, y), x) ->
13270 // (select cond, x, (or x, y))
13271 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13272}
13273
13275 const RISCVSubtarget &Subtarget) {
13276 SDValue N0 = N->getOperand(0);
13277 SDValue N1 = N->getOperand(1);
13278
13279 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
13280 // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create
13281 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
13282 if (!RV64LegalI32 && Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
13283 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
13284 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
13285 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
13286 SDLoc DL(N);
13287 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
13288 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
13289 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
13290 SDValue And = DAG.getNOT(DL, Shl, MVT::i64);
13291 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
13292 }
13293
13294 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
13295 // NOTE: Assumes ROL being legal means ROLW is legal.
13296 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13297 if (N0.getOpcode() == RISCVISD::SLLW &&
13299 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
13300 SDLoc DL(N);
13301 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
13302 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
13303 }
13304
13305 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
13306 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
13307 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
13308 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
13309 if (ConstN00 && CC == ISD::SETLT) {
13310 EVT VT = N0.getValueType();
13311 SDLoc DL(N0);
13312 const APInt &Imm = ConstN00->getAPIntValue();
13313 if ((Imm + 1).isSignedIntN(12))
13314 return DAG.getSetCC(DL, VT, N0.getOperand(1),
13315 DAG.getConstant(Imm + 1, DL, VT), CC);
13316 }
13317 }
13318
13319 // Combine (xor (trunc (X cc Y)) 1) -> (trunc (X !cc Y)). This is needed with
13320 // RV64LegalI32 when the setcc is created after type legalization. An i1 xor
13321 // would have been promoted to i32, but the setcc would have i64 result.
13322 if (N->getValueType(0) == MVT::i32 && N0.getOpcode() == ISD::TRUNCATE &&
13323 isOneConstant(N1) && N0.getOperand(0).getOpcode() == ISD::SETCC) {
13324 SDValue N00 = N0.getOperand(0);
13325 SDLoc DL(N);
13326 SDValue LHS = N00.getOperand(0);
13327 SDValue RHS = N00.getOperand(1);
13328 SDValue CC = N00.getOperand(2);
13329 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
13330 LHS.getValueType());
13331 SDValue Setcc = DAG.getSetCC(SDLoc(N00), N0.getOperand(0).getValueType(),
13332 LHS, RHS, NotCC);
13333 return DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N->getValueType(0), Setcc);
13334 }
13335
13336 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
13337 return V;
13338 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
13339 return V;
13340
13341 // fold (xor (select cond, 0, y), x) ->
13342 // (select cond, x, (xor x, y))
13343 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
13344}
13345
13347 EVT VT = N->getValueType(0);
13348 if (!VT.isVector())
13349 return SDValue();
13350
13351 SDLoc DL(N);
13352 SDValue N0 = N->getOperand(0);
13353 SDValue N1 = N->getOperand(1);
13354 SDValue MulOper;
13355 unsigned AddSubOpc;
13356
13357 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
13358 // (mul x, add (y, 1)) -> (add x, (mul x, y))
13359 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
13360 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
13361 auto IsAddSubWith1 = [&](SDValue V) -> bool {
13362 AddSubOpc = V->getOpcode();
13363 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
13364 SDValue Opnd = V->getOperand(1);
13365 MulOper = V->getOperand(0);
13366 if (AddSubOpc == ISD::SUB)
13367 std::swap(Opnd, MulOper);
13368 if (isOneOrOneSplat(Opnd))
13369 return true;
13370 }
13371 return false;
13372 };
13373
13374 if (IsAddSubWith1(N0)) {
13375 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
13376 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
13377 }
13378
13379 if (IsAddSubWith1(N1)) {
13380 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
13381 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
13382 }
13383
13384 if (SDValue V = combineBinOpOfZExt(N, DAG))
13385 return V;
13386
13387 return SDValue();
13388}
13389
13390/// According to the property that indexed load/store instructions zero-extend
13391/// their indices, try to narrow the type of index operand.
13392static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
13393 if (isIndexTypeSigned(IndexType))
13394 return false;
13395
13396 if (!N->hasOneUse())
13397 return false;
13398
13399 EVT VT = N.getValueType();
13400 SDLoc DL(N);
13401
13402 // In general, what we're doing here is seeing if we can sink a truncate to
13403 // a smaller element type into the expression tree building our index.
13404 // TODO: We can generalize this and handle a bunch more cases if useful.
13405
13406 // Narrow a buildvector to the narrowest element type. This requires less
13407 // work and less register pressure at high LMUL, and creates smaller constants
13408 // which may be cheaper to materialize.
13409 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
13410 KnownBits Known = DAG.computeKnownBits(N);
13411 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
13412 LLVMContext &C = *DAG.getContext();
13413 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
13414 if (ResultVT.bitsLT(VT.getVectorElementType())) {
13415 N = DAG.getNode(ISD::TRUNCATE, DL,
13416 VT.changeVectorElementType(ResultVT), N);
13417 return true;
13418 }
13419 }
13420
13421 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
13422 if (N.getOpcode() != ISD::SHL)
13423 return false;
13424
13425 SDValue N0 = N.getOperand(0);
13426 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
13428 return false;
13429 if (!N0->hasOneUse())
13430 return false;
13431
13432 APInt ShAmt;
13433 SDValue N1 = N.getOperand(1);
13434 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
13435 return false;
13436
13437 SDValue Src = N0.getOperand(0);
13438 EVT SrcVT = Src.getValueType();
13439 unsigned SrcElen = SrcVT.getScalarSizeInBits();
13440 unsigned ShAmtV = ShAmt.getZExtValue();
13441 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
13442 NewElen = std::max(NewElen, 8U);
13443
13444 // Skip if NewElen is not narrower than the original extended type.
13445 if (NewElen >= N0.getValueType().getScalarSizeInBits())
13446 return false;
13447
13448 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
13449 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
13450
13451 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
13452 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
13453 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
13454 return true;
13455}
13456
13457// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
13458// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
13459// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
13460// can become a sext.w instead of a shift pair.
13462 const RISCVSubtarget &Subtarget) {
13463 SDValue N0 = N->getOperand(0);
13464 SDValue N1 = N->getOperand(1);
13465 EVT VT = N->getValueType(0);
13466 EVT OpVT = N0.getValueType();
13467
13468 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
13469 return SDValue();
13470
13471 // RHS needs to be a constant.
13472 auto *N1C = dyn_cast<ConstantSDNode>(N1);
13473 if (!N1C)
13474 return SDValue();
13475
13476 // LHS needs to be (and X, 0xffffffff).
13477 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
13478 !isa<ConstantSDNode>(N0.getOperand(1)) ||
13479 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
13480 return SDValue();
13481
13482 // Looking for an equality compare.
13483 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
13484 if (!isIntEqualitySetCC(Cond))
13485 return SDValue();
13486
13487 // Don't do this if the sign bit is provably zero, it will be turned back into
13488 // an AND.
13489 APInt SignMask = APInt::getOneBitSet(64, 31);
13490 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
13491 return SDValue();
13492
13493 const APInt &C1 = N1C->getAPIntValue();
13494
13495 SDLoc dl(N);
13496 // If the constant is larger than 2^32 - 1 it is impossible for both sides
13497 // to be equal.
13498 if (C1.getActiveBits() > 32)
13499 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
13500
13501 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
13502 N0.getOperand(0), DAG.getValueType(MVT::i32));
13503 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
13504 dl, OpVT), Cond);
13505}
13506
13507static SDValue
13509 const RISCVSubtarget &Subtarget) {
13510 SDValue Src = N->getOperand(0);
13511 EVT VT = N->getValueType(0);
13512
13513 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
13514 if (Src.getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
13515 cast<VTSDNode>(N->getOperand(1))->getVT().bitsGE(MVT::i16))
13516 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT,
13517 Src.getOperand(0));
13518
13519 return SDValue();
13520}
13521
13522namespace {
13523// Forward declaration of the structure holding the necessary information to
13524// apply a combine.
13525struct CombineResult;
13526
13527enum ExtKind : uint8_t { ZExt = 1 << 0, SExt = 1 << 1, FPExt = 1 << 2 };
13528/// Helper class for folding sign/zero extensions.
13529/// In particular, this class is used for the following combines:
13530/// add | add_vl -> vwadd(u) | vwadd(u)_w
13531/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
13532/// mul | mul_vl -> vwmul(u) | vwmul_su
13533/// fadd -> vfwadd | vfwadd_w
13534/// fsub -> vfwsub | vfwsub_w
13535/// fmul -> vfwmul
13536/// An object of this class represents an operand of the operation we want to
13537/// combine.
13538/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
13539/// NodeExtensionHelper for `a` and one for `b`.
13540///
13541/// This class abstracts away how the extension is materialized and
13542/// how its Mask, VL, number of users affect the combines.
13543///
13544/// In particular:
13545/// - VWADD_W is conceptually == add(op0, sext(op1))
13546/// - VWADDU_W == add(op0, zext(op1))
13547/// - VWSUB_W == sub(op0, sext(op1))
13548/// - VWSUBU_W == sub(op0, zext(op1))
13549/// - VFWADD_W == fadd(op0, fpext(op1))
13550/// - VFWSUB_W == fsub(op0, fpext(op1))
13551/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
13552/// zext|sext(smaller_value).
13553struct NodeExtensionHelper {
13554 /// Records if this operand is like being zero extended.
13555 bool SupportsZExt;
13556 /// Records if this operand is like being sign extended.
13557 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
13558 /// instance, a splat constant (e.g., 3), would support being both sign and
13559 /// zero extended.
13560 bool SupportsSExt;
13561 /// Records if this operand is like being floating-Point extended.
13562 bool SupportsFPExt;
13563 /// This boolean captures whether we care if this operand would still be
13564 /// around after the folding happens.
13565 bool EnforceOneUse;
13566 /// Records if this operand's mask needs to match the mask of the operation
13567 /// that it will fold into.
13568 bool CheckMask;
13569 /// Value of the Mask for this operand.
13570 /// It may be SDValue().
13571 SDValue Mask;
13572 /// Value of the vector length operand.
13573 /// It may be SDValue().
13574 SDValue VL;
13575 /// Original value that this NodeExtensionHelper represents.
13576 SDValue OrigOperand;
13577
13578 /// Get the value feeding the extension or the value itself.
13579 /// E.g., for zext(a), this would return a.
13580 SDValue getSource() const {
13581 switch (OrigOperand.getOpcode()) {
13582 case ISD::ZERO_EXTEND:
13583 case ISD::SIGN_EXTEND:
13584 case RISCVISD::VSEXT_VL:
13585 case RISCVISD::VZEXT_VL:
13587 return OrigOperand.getOperand(0);
13588 default:
13589 return OrigOperand;
13590 }
13591 }
13592
13593 /// Check if this instance represents a splat.
13594 bool isSplat() const {
13595 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL;
13596 }
13597
13598 /// Get the extended opcode.
13599 unsigned getExtOpc(ExtKind SupportsExt) const {
13600 switch (SupportsExt) {
13601 case ExtKind::SExt:
13602 return RISCVISD::VSEXT_VL;
13603 case ExtKind::ZExt:
13604 return RISCVISD::VZEXT_VL;
13605 case ExtKind::FPExt:
13607 }
13608 llvm_unreachable("Unknown ExtKind enum");
13609 }
13610
13611 /// Get or create a value that can feed \p Root with the given extension \p
13612 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
13613 /// operand. \see ::getSource().
13614 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
13615 const RISCVSubtarget &Subtarget,
13616 std::optional<ExtKind> SupportsExt) const {
13617 if (!SupportsExt.has_value())
13618 return OrigOperand;
13619
13620 MVT NarrowVT = getNarrowType(Root, *SupportsExt);
13621
13622 SDValue Source = getSource();
13623 assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
13624 if (Source.getValueType() == NarrowVT)
13625 return Source;
13626
13627 unsigned ExtOpc = getExtOpc(*SupportsExt);
13628
13629 // If we need an extension, we should be changing the type.
13630 SDLoc DL(OrigOperand);
13631 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
13632 switch (OrigOperand.getOpcode()) {
13633 case ISD::ZERO_EXTEND:
13634 case ISD::SIGN_EXTEND:
13635 case RISCVISD::VSEXT_VL:
13636 case RISCVISD::VZEXT_VL:
13638 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
13640 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
13641 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
13642 default:
13643 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
13644 // and that operand should already have the right NarrowVT so no
13645 // extension should be required at this point.
13646 llvm_unreachable("Unsupported opcode");
13647 }
13648 }
13649
13650 /// Helper function to get the narrow type for \p Root.
13651 /// The narrow type is the type of \p Root where we divided the size of each
13652 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
13653 /// \pre Both the narrow type and the original type should be legal.
13654 static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {
13655 MVT VT = Root->getSimpleValueType(0);
13656
13657 // Determine the narrow size.
13658 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
13659
13660 MVT EltVT = SupportsExt == ExtKind::FPExt
13661 ? MVT::getFloatingPointVT(NarrowSize)
13662 : MVT::getIntegerVT(NarrowSize);
13663
13664 assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&
13665 "Trying to extend something we can't represent");
13666 MVT NarrowVT = MVT::getVectorVT(EltVT, VT.getVectorElementCount());
13667 return NarrowVT;
13668 }
13669
13670 /// Get the opcode to materialize:
13671 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
13672 static unsigned getSExtOpcode(unsigned Opcode) {
13673 switch (Opcode) {
13674 case ISD::ADD:
13675 case RISCVISD::ADD_VL:
13678 return RISCVISD::VWADD_VL;
13679 case ISD::SUB:
13680 case RISCVISD::SUB_VL:
13683 return RISCVISD::VWSUB_VL;
13684 case ISD::MUL:
13685 case RISCVISD::MUL_VL:
13686 return RISCVISD::VWMUL_VL;
13687 default:
13688 llvm_unreachable("Unexpected opcode");
13689 }
13690 }
13691
13692 /// Get the opcode to materialize:
13693 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
13694 static unsigned getZExtOpcode(unsigned Opcode) {
13695 switch (Opcode) {
13696 case ISD::ADD:
13697 case RISCVISD::ADD_VL:
13700 return RISCVISD::VWADDU_VL;
13701 case ISD::SUB:
13702 case RISCVISD::SUB_VL:
13705 return RISCVISD::VWSUBU_VL;
13706 case ISD::MUL:
13707 case RISCVISD::MUL_VL:
13708 return RISCVISD::VWMULU_VL;
13709 default:
13710 llvm_unreachable("Unexpected opcode");
13711 }
13712 }
13713
13714 /// Get the opcode to materialize:
13715 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
13716 static unsigned getFPExtOpcode(unsigned Opcode) {
13717 switch (Opcode) {
13718 case RISCVISD::FADD_VL:
13720 return RISCVISD::VFWADD_VL;
13721 case RISCVISD::FSUB_VL:
13723 return RISCVISD::VFWSUB_VL;
13724 case RISCVISD::FMUL_VL:
13725 return RISCVISD::VFWMUL_VL;
13726 default:
13727 llvm_unreachable("Unexpected opcode");
13728 }
13729 }
13730
13731 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
13732 /// newOpcode(a, b).
13733 static unsigned getSUOpcode(unsigned Opcode) {
13734 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
13735 "SU is only supported for MUL");
13736 return RISCVISD::VWMULSU_VL;
13737 }
13738
13739 /// Get the opcode to materialize
13740 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).
13741 static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
13742 switch (Opcode) {
13743 case ISD::ADD:
13744 case RISCVISD::ADD_VL:
13745 return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL
13747 case ISD::SUB:
13748 case RISCVISD::SUB_VL:
13749 return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL
13751 case RISCVISD::FADD_VL:
13752 return RISCVISD::VFWADD_W_VL;
13753 case RISCVISD::FSUB_VL:
13754 return RISCVISD::VFWSUB_W_VL;
13755 default:
13756 llvm_unreachable("Unexpected opcode");
13757 }
13758 }
13759
13760 using CombineToTry = std::function<std::optional<CombineResult>(
13761 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
13762 const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
13763 const RISCVSubtarget &)>;
13764
13765 /// Check if this node needs to be fully folded or extended for all users.
13766 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
13767
13768 /// Helper method to set the various fields of this struct based on the
13769 /// type of \p Root.
13770 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
13771 const RISCVSubtarget &Subtarget) {
13772 SupportsZExt = false;
13773 SupportsSExt = false;
13774 SupportsFPExt = false;
13775 EnforceOneUse = true;
13776 CheckMask = true;
13777 unsigned Opc = OrigOperand.getOpcode();
13778 switch (Opc) {
13779 case ISD::ZERO_EXTEND:
13780 case ISD::SIGN_EXTEND: {
13781 MVT VT = OrigOperand.getSimpleValueType();
13782 if (!VT.isVector())
13783 break;
13784
13785 SDValue NarrowElt = OrigOperand.getOperand(0);
13786 MVT NarrowVT = NarrowElt.getSimpleValueType();
13787 // i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
13788 if (NarrowVT.getVectorElementType() == MVT::i1)
13789 break;
13790
13791 SupportsZExt = Opc == ISD::ZERO_EXTEND;
13792 SupportsSExt = Opc == ISD::SIGN_EXTEND;
13793
13794 SDLoc DL(Root);
13795 std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
13796 break;
13797 }
13798 case RISCVISD::VZEXT_VL:
13799 SupportsZExt = true;
13800 Mask = OrigOperand.getOperand(1);
13801 VL = OrigOperand.getOperand(2);
13802 break;
13803 case RISCVISD::VSEXT_VL:
13804 SupportsSExt = true;
13805 Mask = OrigOperand.getOperand(1);
13806 VL = OrigOperand.getOperand(2);
13807 break;
13809 SupportsFPExt = true;
13810 Mask = OrigOperand.getOperand(1);
13811 VL = OrigOperand.getOperand(2);
13812 break;
13813 case RISCVISD::VMV_V_X_VL: {
13814 // Historically, we didn't care about splat values not disappearing during
13815 // combines.
13816 EnforceOneUse = false;
13817 CheckMask = false;
13818 VL = OrigOperand.getOperand(2);
13819
13820 // The operand is a splat of a scalar.
13821
13822 // The pasthru must be undef for tail agnostic.
13823 if (!OrigOperand.getOperand(0).isUndef())
13824 break;
13825
13826 // Get the scalar value.
13827 SDValue Op = OrigOperand.getOperand(1);
13828
13829 // See if we have enough sign bits or zero bits in the scalar to use a
13830 // widening opcode by splatting to smaller element size.
13831 MVT VT = Root->getSimpleValueType(0);
13832 unsigned EltBits = VT.getScalarSizeInBits();
13833 unsigned ScalarBits = Op.getValueSizeInBits();
13834 // Make sure we're getting all element bits from the scalar register.
13835 // FIXME: Support implicit sign extension of vmv.v.x?
13836 if (ScalarBits < EltBits)
13837 break;
13838
13839 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
13840 // If the narrow type cannot be expressed with a legal VMV,
13841 // this is not a valid candidate.
13842 if (NarrowSize < 8)
13843 break;
13844
13845 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
13846 SupportsSExt = true;
13847 if (DAG.MaskedValueIsZero(Op,
13848 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
13849 SupportsZExt = true;
13850 break;
13851 }
13852 default:
13853 break;
13854 }
13855 }
13856
13857 /// Check if \p Root supports any extension folding combines.
13858 static bool isSupportedRoot(const SDNode *Root) {
13859 switch (Root->getOpcode()) {
13860 case ISD::ADD:
13861 case ISD::SUB:
13862 case ISD::MUL: {
13863 return Root->getValueType(0).isScalableVector();
13864 }
13865 // Vector Widening Integer Add/Sub/Mul Instructions
13866 case RISCVISD::ADD_VL:
13867 case RISCVISD::MUL_VL:
13870 case RISCVISD::SUB_VL:
13873 // Vector Widening Floating-Point Add/Sub/Mul Instructions
13874 case RISCVISD::FADD_VL:
13875 case RISCVISD::FSUB_VL:
13876 case RISCVISD::FMUL_VL:
13879 return true;
13880 default:
13881 return false;
13882 }
13883 }
13884
13885 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
13886 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
13887 const RISCVSubtarget &Subtarget) {
13888 assert(isSupportedRoot(Root) && "Trying to build an helper with an "
13889 "unsupported root");
13890 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
13892 OrigOperand = Root->getOperand(OperandIdx);
13893
13894 unsigned Opc = Root->getOpcode();
13895 switch (Opc) {
13896 // We consider
13897 // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))
13898 // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))
13899 // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))
13906 if (OperandIdx == 1) {
13907 SupportsZExt =
13909 SupportsSExt =
13911 SupportsFPExt =
13913 std::tie(Mask, VL) = getMaskAndVL(Root, DAG, Subtarget);
13914 CheckMask = true;
13915 // There's no existing extension here, so we don't have to worry about
13916 // making sure it gets removed.
13917 EnforceOneUse = false;
13918 break;
13919 }
13920 [[fallthrough]];
13921 default:
13922 fillUpExtensionSupport(Root, DAG, Subtarget);
13923 break;
13924 }
13925 }
13926
13927 /// Check if this operand is compatible with the given vector length \p VL.
13928 bool isVLCompatible(SDValue VL) const {
13929 return this->VL != SDValue() && this->VL == VL;
13930 }
13931
13932 /// Check if this operand is compatible with the given \p Mask.
13933 bool isMaskCompatible(SDValue Mask) const {
13934 return !CheckMask || (this->Mask != SDValue() && this->Mask == Mask);
13935 }
13936
13937 /// Helper function to get the Mask and VL from \p Root.
13938 static std::pair<SDValue, SDValue>
13939 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
13940 const RISCVSubtarget &Subtarget) {
13941 assert(isSupportedRoot(Root) && "Unexpected root");
13942 switch (Root->getOpcode()) {
13943 case ISD::ADD:
13944 case ISD::SUB:
13945 case ISD::MUL: {
13946 SDLoc DL(Root);
13947 MVT VT = Root->getSimpleValueType(0);
13948 return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
13949 }
13950 default:
13951 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
13952 }
13953 }
13954
13955 /// Check if the Mask and VL of this operand are compatible with \p Root.
13956 bool areVLAndMaskCompatible(SDNode *Root, SelectionDAG &DAG,
13957 const RISCVSubtarget &Subtarget) const {
13958 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
13959 return isMaskCompatible(Mask) && isVLCompatible(VL);
13960 }
13961
13962 /// Helper function to check if \p N is commutative with respect to the
13963 /// foldings that are supported by this class.
13964 static bool isCommutative(const SDNode *N) {
13965 switch (N->getOpcode()) {
13966 case ISD::ADD:
13967 case ISD::MUL:
13968 case RISCVISD::ADD_VL:
13969 case RISCVISD::MUL_VL:
13972 case RISCVISD::FADD_VL:
13973 case RISCVISD::FMUL_VL:
13975 return true;
13976 case ISD::SUB:
13977 case RISCVISD::SUB_VL:
13980 case RISCVISD::FSUB_VL:
13982 return false;
13983 default:
13984 llvm_unreachable("Unexpected opcode");
13985 }
13986 }
13987
13988 /// Get a list of combine to try for folding extensions in \p Root.
13989 /// Note that each returned CombineToTry function doesn't actually modify
13990 /// anything. Instead they produce an optional CombineResult that if not None,
13991 /// need to be materialized for the combine to be applied.
13992 /// \see CombineResult::materialize.
13993 /// If the related CombineToTry function returns std::nullopt, that means the
13994 /// combine didn't match.
13995 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
13996};
13997
13998/// Helper structure that holds all the necessary information to materialize a
13999/// combine that does some extension folding.
14000struct CombineResult {
14001 /// Opcode to be generated when materializing the combine.
14002 unsigned TargetOpcode;
14003 // No value means no extension is needed.
14004 std::optional<ExtKind> LHSExt;
14005 std::optional<ExtKind> RHSExt;
14006 /// Root of the combine.
14007 SDNode *Root;
14008 /// LHS of the TargetOpcode.
14009 NodeExtensionHelper LHS;
14010 /// RHS of the TargetOpcode.
14011 NodeExtensionHelper RHS;
14012
14013 CombineResult(unsigned TargetOpcode, SDNode *Root,
14014 const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
14015 const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
14016 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),
14017 LHS(LHS), RHS(RHS) {}
14018
14019 /// Return a value that uses TargetOpcode and that can be used to replace
14020 /// Root.
14021 /// The actual replacement is *not* done in that method.
14022 SDValue materialize(SelectionDAG &DAG,
14023 const RISCVSubtarget &Subtarget) const {
14024 SDValue Mask, VL, Merge;
14025 std::tie(Mask, VL) =
14026 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
14027 switch (Root->getOpcode()) {
14028 default:
14029 Merge = Root->getOperand(2);
14030 break;
14031 case ISD::ADD:
14032 case ISD::SUB:
14033 case ISD::MUL:
14034 Merge = DAG.getUNDEF(Root->getValueType(0));
14035 break;
14036 }
14037 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
14038 LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),
14039 RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),
14040 Merge, Mask, VL);
14041 }
14042};
14043
14044/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
14045/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
14046/// are zext) and LHS and RHS can be folded into Root.
14047/// AllowExtMask define which form `ext` can take in this pattern.
14048///
14049/// \note If the pattern can match with both zext and sext, the returned
14050/// CombineResult will feature the zext result.
14051///
14052/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14053/// can be used to apply the pattern.
14054static std::optional<CombineResult>
14055canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
14056 const NodeExtensionHelper &RHS,
14057 uint8_t AllowExtMask, SelectionDAG &DAG,
14058 const RISCVSubtarget &Subtarget) {
14059 if (!LHS.areVLAndMaskCompatible(Root, DAG, Subtarget) ||
14060 !RHS.areVLAndMaskCompatible(Root, DAG, Subtarget))
14061 return std::nullopt;
14062 if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)
14063 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
14064 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
14065 /*RHSExt=*/{ExtKind::ZExt});
14066 if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)
14067 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
14068 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
14069 /*RHSExt=*/{ExtKind::SExt});
14070 if ((AllowExtMask & ExtKind::FPExt) && RHS.SupportsFPExt)
14071 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
14072 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
14073 /*RHSExt=*/{ExtKind::FPExt});
14074 return std::nullopt;
14075}
14076
14077/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
14078/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
14079/// are zext) and LHS and RHS can be folded into Root.
14080///
14081/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14082/// can be used to apply the pattern.
14083static std::optional<CombineResult>
14084canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
14085 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14086 const RISCVSubtarget &Subtarget) {
14087 return canFoldToVWWithSameExtensionImpl(
14088 Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,
14089 Subtarget);
14090}
14091
14092/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
14093///
14094/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14095/// can be used to apply the pattern.
14096static std::optional<CombineResult>
14097canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
14098 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14099 const RISCVSubtarget &Subtarget) {
14100 if (!RHS.areVLAndMaskCompatible(Root, DAG, Subtarget))
14101 return std::nullopt;
14102
14103 if (RHS.SupportsFPExt)
14104 return CombineResult(
14105 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),
14106 Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt});
14107
14108 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
14109 // sext/zext?
14110 // Control this behavior behind an option (AllowSplatInVW_W) for testing
14111 // purposes.
14112 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
14113 return CombineResult(
14114 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,
14115 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt});
14116 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
14117 return CombineResult(
14118 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,
14119 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt});
14120 return std::nullopt;
14121}
14122
14123/// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
14124///
14125/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14126/// can be used to apply the pattern.
14127static std::optional<CombineResult>
14128canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14129 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14130 const RISCVSubtarget &Subtarget) {
14131 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::SExt, DAG,
14132 Subtarget);
14133}
14134
14135/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
14136///
14137/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14138/// can be used to apply the pattern.
14139static std::optional<CombineResult>
14140canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14141 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14142 const RISCVSubtarget &Subtarget) {
14143 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,
14144 Subtarget);
14145}
14146
14147/// Check if \p Root follows a pattern Root(fpext(LHS), fpext(RHS))
14148///
14149/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14150/// can be used to apply the pattern.
14151static std::optional<CombineResult>
14152canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS,
14153 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14154 const RISCVSubtarget &Subtarget) {
14155 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::FPExt, DAG,
14156 Subtarget);
14157}
14158
14159/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
14160///
14161/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
14162/// can be used to apply the pattern.
14163static std::optional<CombineResult>
14164canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
14165 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
14166 const RISCVSubtarget &Subtarget) {
14167
14168 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
14169 return std::nullopt;
14170 if (!LHS.areVLAndMaskCompatible(Root, DAG, Subtarget) ||
14171 !RHS.areVLAndMaskCompatible(Root, DAG, Subtarget))
14172 return std::nullopt;
14173 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
14174 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
14175 /*RHSExt=*/{ExtKind::ZExt});
14176}
14177
14179NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
14180 SmallVector<CombineToTry> Strategies;
14181 switch (Root->getOpcode()) {
14182 case ISD::ADD:
14183 case ISD::SUB:
14184 case RISCVISD::ADD_VL:
14185 case RISCVISD::SUB_VL:
14186 case RISCVISD::FADD_VL:
14187 case RISCVISD::FSUB_VL:
14188 // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub
14189 Strategies.push_back(canFoldToVWWithSameExtension);
14190 // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w
14191 Strategies.push_back(canFoldToVW_W);
14192 break;
14193 case RISCVISD::FMUL_VL:
14194 Strategies.push_back(canFoldToVWWithSameExtension);
14195 break;
14196 case ISD::MUL:
14197 case RISCVISD::MUL_VL:
14198 // mul -> vwmul(u)
14199 Strategies.push_back(canFoldToVWWithSameExtension);
14200 // mul -> vwmulsu
14201 Strategies.push_back(canFoldToVW_SU);
14202 break;
14205 // vwadd_w|vwsub_w -> vwadd|vwsub
14206 Strategies.push_back(canFoldToVWWithSEXT);
14207 break;
14210 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
14211 Strategies.push_back(canFoldToVWWithZEXT);
14212 break;
14215 // vfwadd_w|vfwsub_w -> vfwadd|vfwsub
14216 Strategies.push_back(canFoldToVWWithFPEXT);
14217 break;
14218 default:
14219 llvm_unreachable("Unexpected opcode");
14220 }
14221 return Strategies;
14222}
14223} // End anonymous namespace.
14224
14225/// Combine a binary operation to its equivalent VW or VW_W form.
14226/// The supported combines are:
14227/// add_vl -> vwadd(u) | vwadd(u)_w
14228/// sub_vl -> vwsub(u) | vwsub(u)_w
14229/// mul_vl -> vwmul(u) | vwmul_su
14230/// fadd_vl -> vfwadd | vfwadd_w
14231/// fsub_vl -> vfwsub | vfwsub_w
14232/// fmul_vl -> vfwmul
14233/// vwadd_w(u) -> vwadd(u)
14234/// vwsub_w(u) -> vwsub(u)
14235/// vfwadd_w -> vfwadd
14236/// vfwsub_w -> vfwsub
14239 const RISCVSubtarget &Subtarget) {
14240 SelectionDAG &DAG = DCI.DAG;
14241 if (DCI.isBeforeLegalize())
14242 return SDValue();
14243
14244 if (!NodeExtensionHelper::isSupportedRoot(N))
14245 return SDValue();
14246
14247 SmallVector<SDNode *> Worklist;
14248 SmallSet<SDNode *, 8> Inserted;
14249 Worklist.push_back(N);
14250 Inserted.insert(N);
14251 SmallVector<CombineResult> CombinesToApply;
14252
14253 while (!Worklist.empty()) {
14254 SDNode *Root = Worklist.pop_back_val();
14255 if (!NodeExtensionHelper::isSupportedRoot(Root))
14256 return SDValue();
14257
14258 NodeExtensionHelper LHS(N, 0, DAG, Subtarget);
14259 NodeExtensionHelper RHS(N, 1, DAG, Subtarget);
14260 auto AppendUsersIfNeeded = [&Worklist,
14261 &Inserted](const NodeExtensionHelper &Op) {
14262 if (Op.needToPromoteOtherUsers()) {
14263 for (SDNode *TheUse : Op.OrigOperand->uses()) {
14264 if (Inserted.insert(TheUse).second)
14265 Worklist.push_back(TheUse);
14266 }
14267 }
14268 };
14269
14270 // Control the compile time by limiting the number of node we look at in
14271 // total.
14272 if (Inserted.size() > ExtensionMaxWebSize)
14273 return SDValue();
14274
14276 NodeExtensionHelper::getSupportedFoldings(N);
14277
14278 assert(!FoldingStrategies.empty() && "Nothing to be folded");
14279 bool Matched = false;
14280 for (int Attempt = 0;
14281 (Attempt != 1 + NodeExtensionHelper::isCommutative(N)) && !Matched;
14282 ++Attempt) {
14283
14284 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
14285 FoldingStrategies) {
14286 std::optional<CombineResult> Res =
14287 FoldingStrategy(N, LHS, RHS, DAG, Subtarget);
14288 if (Res) {
14289 Matched = true;
14290 CombinesToApply.push_back(*Res);
14291 // All the inputs that are extended need to be folded, otherwise
14292 // we would be leaving the old input (since it is may still be used),
14293 // and the new one.
14294 if (Res->LHSExt.has_value())
14295 AppendUsersIfNeeded(LHS);
14296 if (Res->RHSExt.has_value())
14297 AppendUsersIfNeeded(RHS);
14298 break;
14299 }
14300 }
14301 std::swap(LHS, RHS);
14302 }
14303 // Right now we do an all or nothing approach.
14304 if (!Matched)
14305 return SDValue();
14306 }
14307 // Store the value for the replacement of the input node separately.
14308 SDValue InputRootReplacement;
14309 // We do the RAUW after we materialize all the combines, because some replaced
14310 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
14311 // some of these nodes may appear in the NodeExtensionHelpers of some of the
14312 // yet-to-be-visited CombinesToApply roots.
14314 ValuesToReplace.reserve(CombinesToApply.size());
14315 for (CombineResult Res : CombinesToApply) {
14316 SDValue NewValue = Res.materialize(DAG, Subtarget);
14317 if (!InputRootReplacement) {
14318 assert(Res.Root == N &&
14319 "First element is expected to be the current node");
14320 InputRootReplacement = NewValue;
14321 } else {
14322 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
14323 }
14324 }
14325 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
14326 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
14327 DCI.AddToWorklist(OldNewValues.second.getNode());
14328 }
14329 return InputRootReplacement;
14330}
14331
14332// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
14333// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
14334// y will be the Passthru and cond will be the Mask.
14336 unsigned Opc = N->getOpcode();
14339
14340 SDValue Y = N->getOperand(0);
14341 SDValue MergeOp = N->getOperand(1);
14342 unsigned MergeOpc = MergeOp.getOpcode();
14343
14344 if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
14345 return SDValue();
14346
14347 SDValue X = MergeOp->getOperand(1);
14348
14349 if (!MergeOp.hasOneUse())
14350 return SDValue();
14351
14352 // Passthru should be undef
14353 SDValue Passthru = N->getOperand(2);
14354 if (!Passthru.isUndef())
14355 return SDValue();
14356
14357 // Mask should be all ones
14358 SDValue Mask = N->getOperand(3);
14359 if (Mask.getOpcode() != RISCVISD::VMSET_VL)
14360 return SDValue();
14361
14362 // False value of MergeOp should be all zeros
14363 SDValue Z = MergeOp->getOperand(2);
14364
14365 if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
14366 (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))
14367 Z = Z.getOperand(1);
14368
14369 if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))
14370 return SDValue();
14371
14372 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),
14373 {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},
14374 N->getFlags());
14375}
14376
14379 const RISCVSubtarget &Subtarget) {
14380 [[maybe_unused]] unsigned Opc = N->getOpcode();
14383
14384 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
14385 return V;
14386
14387 return combineVWADDSUBWSelect(N, DCI.DAG);
14388}
14389
14390// Helper function for performMemPairCombine.
14391// Try to combine the memory loads/stores LSNode1 and LSNode2
14392// into a single memory pair operation.
14394 LSBaseSDNode *LSNode2, SDValue BasePtr,
14395 uint64_t Imm) {
14397 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
14398
14399 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
14400 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
14401 return SDValue();
14402
14404 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
14405
14406 // The new operation has twice the width.
14407 MVT XLenVT = Subtarget.getXLenVT();
14408 EVT MemVT = LSNode1->getMemoryVT();
14409 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
14410 MachineMemOperand *MMO = LSNode1->getMemOperand();
14412 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
14413
14414 if (LSNode1->getOpcode() == ISD::LOAD) {
14415 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
14416 unsigned Opcode;
14417 if (MemVT == MVT::i32)
14418 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
14419 else
14420 Opcode = RISCVISD::TH_LDD;
14421
14422 SDValue Res = DAG.getMemIntrinsicNode(
14423 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
14424 {LSNode1->getChain(), BasePtr,
14425 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
14426 NewMemVT, NewMMO);
14427
14428 SDValue Node1 =
14429 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
14430 SDValue Node2 =
14431 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
14432
14433 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
14434 return Node1;
14435 } else {
14436 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
14437
14438 SDValue Res = DAG.getMemIntrinsicNode(
14439 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
14440 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
14441 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
14442 NewMemVT, NewMMO);
14443
14444 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
14445 return Res;
14446 }
14447}
14448
14449// Try to combine two adjacent loads/stores to a single pair instruction from
14450// the XTHeadMemPair vendor extension.
14453 SelectionDAG &DAG = DCI.DAG;
14455 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
14456
14457 // Target does not support load/store pair.
14458 if (!Subtarget.hasVendorXTHeadMemPair())
14459 return SDValue();
14460
14461 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
14462 EVT MemVT = LSNode1->getMemoryVT();
14463 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
14464
14465 // No volatile, indexed or atomic loads/stores.
14466 if (!LSNode1->isSimple() || LSNode1->isIndexed())
14467 return SDValue();
14468
14469 // Function to get a base + constant representation from a memory value.
14470 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
14471 if (Ptr->getOpcode() == ISD::ADD)
14472 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
14473 return {Ptr->getOperand(0), C1->getZExtValue()};
14474 return {Ptr, 0};
14475 };
14476
14477 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
14478
14479 SDValue Chain = N->getOperand(0);
14480 for (SDNode::use_iterator UI = Chain->use_begin(), UE = Chain->use_end();
14481 UI != UE; ++UI) {
14482 SDUse &Use = UI.getUse();
14483 if (Use.getUser() != N && Use.getResNo() == 0 &&
14484 Use.getUser()->getOpcode() == N->getOpcode()) {
14485 LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser());
14486
14487 // No volatile, indexed or atomic loads/stores.
14488 if (!LSNode2->isSimple() || LSNode2->isIndexed())
14489 continue;
14490
14491 // Check if LSNode1 and LSNode2 have the same type and extension.
14492 if (LSNode1->getOpcode() == ISD::LOAD)
14493 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
14494 cast<LoadSDNode>(LSNode1)->getExtensionType())
14495 continue;
14496
14497 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
14498 continue;
14499
14500 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
14501
14502 // Check if the base pointer is the same for both instruction.
14503 if (Base1 != Base2)
14504 continue;
14505
14506 // Check if the offsets match the XTHeadMemPair encoding contraints.
14507 bool Valid = false;
14508 if (MemVT == MVT::i32) {
14509 // Check for adjacent i32 values and a 2-bit index.
14510 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
14511 Valid = true;
14512 } else if (MemVT == MVT::i64) {
14513 // Check for adjacent i64 values and a 2-bit index.
14514 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
14515 Valid = true;
14516 }
14517
14518 if (!Valid)
14519 continue;
14520
14521 // Try to combine.
14522 if (SDValue Res =
14523 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
14524 return Res;
14525 }
14526 }
14527
14528 return SDValue();
14529}
14530
14531// Fold
14532// (fp_to_int (froundeven X)) -> fcvt X, rne
14533// (fp_to_int (ftrunc X)) -> fcvt X, rtz
14534// (fp_to_int (ffloor X)) -> fcvt X, rdn
14535// (fp_to_int (fceil X)) -> fcvt X, rup
14536// (fp_to_int (fround X)) -> fcvt X, rmm
14537// (fp_to_int (frint X)) -> fcvt X
14540 const RISCVSubtarget &Subtarget) {
14541 SelectionDAG &DAG = DCI.DAG;
14542 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14543 MVT XLenVT = Subtarget.getXLenVT();
14544
14545 SDValue Src = N->getOperand(0);
14546
14547 // Don't do this for strict-fp Src.
14548 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
14549 return SDValue();
14550
14551 // Ensure the FP type is legal.
14552 if (!TLI.isTypeLegal(Src.getValueType()))
14553 return SDValue();
14554
14555 // Don't do this for f16 with Zfhmin and not Zfh.
14556 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
14557 return SDValue();
14558
14559 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
14560 // If the result is invalid, we didn't find a foldable instruction.
14561 if (FRM == RISCVFPRndMode::Invalid)
14562 return SDValue();
14563
14564 SDLoc DL(N);
14565 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
14566 EVT VT = N->getValueType(0);
14567
14568 if (VT.isVector() && TLI.isTypeLegal(VT)) {
14569 MVT SrcVT = Src.getSimpleValueType();
14570 MVT SrcContainerVT = SrcVT;
14571 MVT ContainerVT = VT.getSimpleVT();
14572 SDValue XVal = Src.getOperand(0);
14573
14574 // For widening and narrowing conversions we just combine it into a
14575 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
14576 // end up getting lowered to their appropriate pseudo instructions based on
14577 // their operand types
14578 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
14579 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
14580 return SDValue();
14581
14582 // Make fixed-length vectors scalable first
14583 if (SrcVT.isFixedLengthVector()) {
14584 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
14585 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
14586 ContainerVT =
14587 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
14588 }
14589
14590 auto [Mask, VL] =
14591 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
14592
14593 SDValue FpToInt;
14594 if (FRM == RISCVFPRndMode::RTZ) {
14595 // Use the dedicated trunc static rounding mode if we're truncating so we
14596 // don't need to generate calls to fsrmi/fsrm
14597 unsigned Opc =
14599 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
14600 } else if (FRM == RISCVFPRndMode::DYN) {
14601 unsigned Opc =
14603 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
14604 } else {
14605 unsigned Opc =
14607 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
14608 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
14609 }
14610
14611 // If converted from fixed-length to scalable, convert back
14612 if (VT.isFixedLengthVector())
14613 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
14614
14615 return FpToInt;
14616 }
14617
14618 // Only handle XLen or i32 types. Other types narrower than XLen will
14619 // eventually be legalized to XLenVT.
14620 if (VT != MVT::i32 && VT != XLenVT)
14621 return SDValue();
14622
14623 unsigned Opc;
14624 if (VT == XLenVT)
14625 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
14626 else
14628
14629 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
14630 DAG.getTargetConstant(FRM, DL, XLenVT));
14631 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
14632}
14633
14634// Fold
14635// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
14636// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
14637// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
14638// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
14639// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
14640// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
14643 const RISCVSubtarget &Subtarget) {
14644 SelectionDAG &DAG = DCI.DAG;
14645 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14646 MVT XLenVT = Subtarget.getXLenVT();
14647
14648 // Only handle XLen types. Other types narrower than XLen will eventually be
14649 // legalized to XLenVT.
14650 EVT DstVT = N->getValueType(0);
14651 if (DstVT != XLenVT)
14652 return SDValue();
14653
14654 SDValue Src = N->getOperand(0);
14655
14656 // Don't do this for strict-fp Src.
14657 if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
14658 return SDValue();
14659
14660 // Ensure the FP type is also legal.
14661 if (!TLI.isTypeLegal(Src.getValueType()))
14662 return SDValue();
14663
14664 // Don't do this for f16 with Zfhmin and not Zfh.
14665 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
14666 return SDValue();
14667
14668 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
14669
14670 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
14671 if (FRM == RISCVFPRndMode::Invalid)
14672 return SDValue();
14673
14674 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
14675
14676 unsigned Opc;
14677 if (SatVT == DstVT)
14678 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
14679 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
14681 else
14682 return SDValue();
14683 // FIXME: Support other SatVTs by clamping before or after the conversion.
14684
14685 Src = Src.getOperand(0);
14686
14687 SDLoc DL(N);
14688 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
14689 DAG.getTargetConstant(FRM, DL, XLenVT));
14690
14691 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
14692 // extend.
14693 if (Opc == RISCVISD::FCVT_WU_RV64)
14694 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
14695
14696 // RISC-V FP-to-int conversions saturate to the destination register size, but
14697 // don't produce 0 for nan.
14698 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
14699 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
14700}
14701
14702// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
14703// smaller than XLenVT.
14705 const RISCVSubtarget &Subtarget) {
14706 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
14707
14708 SDValue Src = N->getOperand(0);
14709 if (Src.getOpcode() != ISD::BSWAP)
14710 return SDValue();
14711
14712 EVT VT = N->getValueType(0);
14713 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
14714 !llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))
14715 return SDValue();
14716
14717 SDLoc DL(N);
14718 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
14719}
14720
14721// Convert from one FMA opcode to another based on whether we are negating the
14722// multiply result and/or the accumulator.
14723// NOTE: Only supports RVV operations with VL.
14724static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
14725 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
14726 if (NegMul) {
14727 // clang-format off
14728 switch (Opcode) {
14729 default: llvm_unreachable("Unexpected opcode");
14730 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
14731 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
14732 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
14733 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
14738 }
14739 // clang-format on
14740 }
14741
14742 // Negating the accumulator changes ADD<->SUB.
14743 if (NegAcc) {
14744 // clang-format off
14745 switch (Opcode) {
14746 default: llvm_unreachable("Unexpected opcode");
14747 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
14748 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
14749 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
14750 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
14755 }
14756 // clang-format on
14757 }
14758
14759 return Opcode;
14760}
14761
14763 // Fold FNEG_VL into FMA opcodes.
14764 // The first operand of strict-fp is chain.
14765 unsigned Offset = N->isTargetStrictFPOpcode();
14766 SDValue A = N->getOperand(0 + Offset);
14767 SDValue B = N->getOperand(1 + Offset);
14768 SDValue C = N->getOperand(2 + Offset);
14769 SDValue Mask = N->getOperand(3 + Offset);
14770 SDValue VL = N->getOperand(4 + Offset);
14771
14772 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
14773 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
14774 V.getOperand(2) == VL) {
14775 // Return the negated input.
14776 V = V.getOperand(0);
14777 return true;
14778 }
14779
14780 return false;
14781 };
14782
14783 bool NegA = invertIfNegative(A);
14784 bool NegB = invertIfNegative(B);
14785 bool NegC = invertIfNegative(C);
14786
14787 // If no operands are negated, we're done.
14788 if (!NegA && !NegB && !NegC)
14789 return SDValue();
14790
14791 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
14792 if (N->isTargetStrictFPOpcode())
14793 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
14794 {N->getOperand(0), A, B, C, Mask, VL});
14795 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
14796 VL);
14797}
14798
14800 const RISCVSubtarget &Subtarget) {
14802 return V;
14803
14804 if (N->getValueType(0).isScalableVector() &&
14805 N->getValueType(0).getVectorElementType() == MVT::f32 &&
14806 (Subtarget.hasVInstructionsF16Minimal() &&
14807 !Subtarget.hasVInstructionsF16())) {
14808 return SDValue();
14809 }
14810
14811 // FIXME: Ignore strict opcodes for now.
14812 if (N->isTargetStrictFPOpcode())
14813 return SDValue();
14814
14815 // Try to form widening FMA.
14816 SDValue Op0 = N->getOperand(0);
14817 SDValue Op1 = N->getOperand(1);
14818 SDValue Mask = N->getOperand(3);
14819 SDValue VL = N->getOperand(4);
14820
14821 if (Op0.getOpcode() != RISCVISD::FP_EXTEND_VL ||
14823 return SDValue();
14824
14825 // TODO: Refactor to handle more complex cases similar to
14826 // combineBinOp_VLToVWBinOp_VL.
14827 if ((!Op0.hasOneUse() || !Op1.hasOneUse()) &&
14828 (Op0 != Op1 || !Op0->hasNUsesOfValue(2, 0)))
14829 return SDValue();
14830
14831 // Check the mask and VL are the same.
14832 if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL ||
14833 Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL)
14834 return SDValue();
14835
14836 unsigned NewOpc;
14837 switch (N->getOpcode()) {
14838 default:
14839 llvm_unreachable("Unexpected opcode");
14841 NewOpc = RISCVISD::VFWMADD_VL;
14842 break;
14844 NewOpc = RISCVISD::VFWNMSUB_VL;
14845 break;
14847 NewOpc = RISCVISD::VFWNMADD_VL;
14848 break;
14850 NewOpc = RISCVISD::VFWMSUB_VL;
14851 break;
14852 }
14853
14854 Op0 = Op0.getOperand(0);
14855 Op1 = Op1.getOperand(0);
14856
14857 return DAG.getNode(NewOpc, SDLoc(N), N->getValueType(0), Op0, Op1,
14858 N->getOperand(2), Mask, VL);
14859}
14860
14862 const RISCVSubtarget &Subtarget) {
14863 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
14864
14865 if (N->getValueType(0) != MVT::i64 || !Subtarget.is64Bit())
14866 return SDValue();
14867
14868 if (!isa<ConstantSDNode>(N->getOperand(1)))
14869 return SDValue();
14870 uint64_t ShAmt = N->getConstantOperandVal(1);
14871 if (ShAmt > 32)
14872 return SDValue();
14873
14874 SDValue N0 = N->getOperand(0);
14875
14876 // Combine (sra (sext_inreg (shl X, C1), i32), C2) ->
14877 // (sra (shl X, C1+32), C2+32) so it gets selected as SLLI+SRAI instead of
14878 // SLLIW+SRAIW. SLLI+SRAI have compressed forms.
14879 if (ShAmt < 32 &&
14880 N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse() &&
14881 cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32 &&
14882 N0.getOperand(0).getOpcode() == ISD::SHL && N0.getOperand(0).hasOneUse() &&
14883 isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
14884 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
14885 if (LShAmt < 32) {
14886 SDLoc ShlDL(N0.getOperand(0));
14887 SDValue Shl = DAG.getNode(ISD::SHL, ShlDL, MVT::i64,
14888 N0.getOperand(0).getOperand(0),
14889 DAG.getConstant(LShAmt + 32, ShlDL, MVT::i64));
14890 SDLoc DL(N);
14891 return DAG.getNode(ISD::SRA, DL, MVT::i64, Shl,
14892 DAG.getConstant(ShAmt + 32, DL, MVT::i64));
14893 }
14894 }
14895
14896 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
14897 // FIXME: Should this be a generic combine? There's a similar combine on X86.
14898 //
14899 // Also try these folds where an add or sub is in the middle.
14900 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
14901 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
14902 SDValue Shl;
14903 ConstantSDNode *AddC = nullptr;
14904
14905 // We might have an ADD or SUB between the SRA and SHL.
14906 bool IsAdd = N0.getOpcode() == ISD::ADD;
14907 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
14908 // Other operand needs to be a constant we can modify.
14909 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
14910 if (!AddC)
14911 return SDValue();
14912
14913 // AddC needs to have at least 32 trailing zeros.
14914 if (AddC->getAPIntValue().countr_zero() < 32)
14915 return SDValue();
14916
14917 // All users should be a shift by constant less than or equal to 32. This
14918 // ensures we'll do this optimization for each of them to produce an
14919 // add/sub+sext_inreg they can all share.
14920 for (SDNode *U : N0->uses()) {
14921 if (U->getOpcode() != ISD::SRA ||
14922 !isa<ConstantSDNode>(U->getOperand(1)) ||
14923 U->getConstantOperandVal(1) > 32)
14924 return SDValue();
14925 }
14926
14927 Shl = N0.getOperand(IsAdd ? 0 : 1);
14928 } else {
14929 // Not an ADD or SUB.
14930 Shl = N0;
14931 }
14932
14933 // Look for a shift left by 32.
14934 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
14935 Shl.getConstantOperandVal(1) != 32)
14936 return SDValue();
14937
14938 // We if we didn't look through an add/sub, then the shl should have one use.
14939 // If we did look through an add/sub, the sext_inreg we create is free so
14940 // we're only creating 2 new instructions. It's enough to only remove the
14941 // original sra+add/sub.
14942 if (!AddC && !Shl.hasOneUse())
14943 return SDValue();
14944
14945 SDLoc DL(N);
14946 SDValue In = Shl.getOperand(0);
14947
14948 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
14949 // constant.
14950 if (AddC) {
14951 SDValue ShiftedAddC =
14952 DAG.getConstant(AddC->getAPIntValue().lshr(32), DL, MVT::i64);
14953 if (IsAdd)
14954 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
14955 else
14956 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
14957 }
14958
14959 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
14960 DAG.getValueType(MVT::i32));
14961 if (ShAmt == 32)
14962 return SExt;
14963
14964 return DAG.getNode(
14965 ISD::SHL, DL, MVT::i64, SExt,
14966 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
14967}
14968
14969// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
14970// the result is used as the conditon of a br_cc or select_cc we can invert,
14971// inverting the setcc is free, and Z is 0/1. Caller will invert the
14972// br_cc/select_cc.
14974 bool IsAnd = Cond.getOpcode() == ISD::AND;
14975 if (!IsAnd && Cond.getOpcode() != ISD::OR)
14976 return SDValue();
14977
14978 if (!Cond.hasOneUse())
14979 return SDValue();
14980
14981 SDValue Setcc = Cond.getOperand(0);
14982 SDValue Xor = Cond.getOperand(1);
14983 // Canonicalize setcc to LHS.
14984 if (Setcc.getOpcode() != ISD::SETCC)
14985 std::swap(Setcc, Xor);
14986 // LHS should be a setcc and RHS should be an xor.
14987 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
14988 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
14989 return SDValue();
14990
14991 // If the condition is an And, SimplifyDemandedBits may have changed
14992 // (xor Z, 1) to (not Z).
14993 SDValue Xor1 = Xor.getOperand(1);
14994 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
14995 return SDValue();
14996
14997 EVT VT = Cond.getValueType();
14998 SDValue Xor0 = Xor.getOperand(0);
14999
15000 // The LHS of the xor needs to be 0/1.
15002 if (!DAG.MaskedValueIsZero(Xor0, Mask))
15003 return SDValue();
15004
15005 // We can only invert integer setccs.
15006 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
15007 if (!SetCCOpVT.isScalarInteger())
15008 return SDValue();
15009
15010 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
15011 if (ISD::isIntEqualitySetCC(CCVal)) {
15012 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
15013 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
15014 Setcc.getOperand(1), CCVal);
15015 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
15016 // Invert (setlt 0, X) by converting to (setlt X, 1).
15017 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
15018 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
15019 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
15020 // (setlt X, 1) by converting to (setlt 0, X).
15021 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
15022 DAG.getConstant(0, SDLoc(Setcc), VT),
15023 Setcc.getOperand(0), CCVal);
15024 } else
15025 return SDValue();
15026
15027 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
15028 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
15029}
15030
15031// Perform common combines for BR_CC and SELECT_CC condtions.
15032static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
15033 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
15034 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
15035
15036 // As far as arithmetic right shift always saves the sign,
15037 // shift can be omitted.
15038 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
15039 // setge (sra X, N), 0 -> setge X, 0
15040 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
15041 LHS.getOpcode() == ISD::SRA) {
15042 LHS = LHS.getOperand(0);
15043 return true;
15044 }
15045
15046 if (!ISD::isIntEqualitySetCC(CCVal))
15047 return false;
15048
15049 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
15050 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
15051 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
15052 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
15053 // If we're looking for eq 0 instead of ne 0, we need to invert the
15054 // condition.
15055 bool Invert = CCVal == ISD::SETEQ;
15056 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
15057 if (Invert)
15058 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
15059
15060 RHS = LHS.getOperand(1);
15061 LHS = LHS.getOperand(0);
15062 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
15063
15064 CC = DAG.getCondCode(CCVal);
15065 return true;
15066 }
15067
15068 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
15069 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) {
15070 RHS = LHS.getOperand(1);
15071 LHS = LHS.getOperand(0);
15072 return true;
15073 }
15074
15075 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
15076 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
15077 LHS.getOperand(1).getOpcode() == ISD::Constant) {
15078 SDValue LHS0 = LHS.getOperand(0);
15079 if (LHS0.getOpcode() == ISD::AND &&
15080 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
15081 uint64_t Mask = LHS0.getConstantOperandVal(1);
15082 uint64_t ShAmt = LHS.getConstantOperandVal(1);
15083 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
15084 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
15085 CC = DAG.getCondCode(CCVal);
15086
15087 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
15088 LHS = LHS0.getOperand(0);
15089 if (ShAmt != 0)
15090 LHS =
15091 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
15092 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
15093 return true;
15094 }
15095 }
15096 }
15097
15098 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
15099 // This can occur when legalizing some floating point comparisons.
15100 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
15101 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
15102 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
15103 CC = DAG.getCondCode(CCVal);
15104 RHS = DAG.getConstant(0, DL, LHS.getValueType());
15105 return true;
15106 }
15107
15108 if (isNullConstant(RHS)) {
15109 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
15110 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
15111 CC = DAG.getCondCode(CCVal);
15112 LHS = NewCond;
15113 return true;
15114 }
15115 }
15116
15117 return false;
15118}
15119
15120// Fold
15121// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
15122// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
15123// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
15124// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
15126 SDValue TrueVal, SDValue FalseVal,
15127 bool Swapped) {
15128 bool Commutative = true;
15129 unsigned Opc = TrueVal.getOpcode();
15130 switch (Opc) {
15131 default:
15132 return SDValue();
15133 case ISD::SHL:
15134 case ISD::SRA:
15135 case ISD::SRL:
15136 case ISD::SUB:
15137 Commutative = false;
15138 break;
15139 case ISD::ADD:
15140 case ISD::OR:
15141 case ISD::XOR:
15142 break;
15143 }
15144
15145 if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal))
15146 return SDValue();
15147
15148 unsigned OpToFold;
15149 if (FalseVal == TrueVal.getOperand(0))
15150 OpToFold = 0;
15151 else if (Commutative && FalseVal == TrueVal.getOperand(1))
15152 OpToFold = 1;
15153 else
15154 return SDValue();
15155
15156 EVT VT = N->getValueType(0);
15157 SDLoc DL(N);
15158 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
15159 EVT OtherOpVT = OtherOp->getValueType(0);
15160 SDValue IdentityOperand =
15161 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
15162 if (!Commutative)
15163 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
15164 assert(IdentityOperand && "No identity operand!");
15165
15166 if (Swapped)
15167 std::swap(OtherOp, IdentityOperand);
15168 SDValue NewSel =
15169 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
15170 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
15171}
15172
15173// This tries to get rid of `select` and `icmp` that are being used to handle
15174// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
15176 SDValue Cond = N->getOperand(0);
15177
15178 // This represents either CTTZ or CTLZ instruction.
15179 SDValue CountZeroes;
15180
15181 SDValue ValOnZero;
15182
15183 if (Cond.getOpcode() != ISD::SETCC)
15184 return SDValue();
15185
15186 if (!isNullConstant(Cond->getOperand(1)))
15187 return SDValue();
15188
15189 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
15190 if (CCVal == ISD::CondCode::SETEQ) {
15191 CountZeroes = N->getOperand(2);
15192 ValOnZero = N->getOperand(1);
15193 } else if (CCVal == ISD::CondCode::SETNE) {
15194 CountZeroes = N->getOperand(1);
15195 ValOnZero = N->getOperand(2);
15196 } else {
15197 return SDValue();
15198 }
15199
15200 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
15201 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
15202 CountZeroes = CountZeroes.getOperand(0);
15203
15204 if (CountZeroes.getOpcode() != ISD::CTTZ &&
15205 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
15206 CountZeroes.getOpcode() != ISD::CTLZ &&
15207 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
15208 return SDValue();
15209
15210 if (!isNullConstant(ValOnZero))
15211 return SDValue();
15212
15213 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
15214 if (Cond->getOperand(0) != CountZeroesArgument)
15215 return SDValue();
15216
15217 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
15218 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
15219 CountZeroes.getValueType(), CountZeroesArgument);
15220 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
15221 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
15222 CountZeroes.getValueType(), CountZeroesArgument);
15223 }
15224
15225 unsigned BitWidth = CountZeroes.getValueSizeInBits();
15226 SDValue BitWidthMinusOne =
15227 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
15228
15229 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
15230 CountZeroes, BitWidthMinusOne);
15231 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
15232}
15233
15235 const RISCVSubtarget &Subtarget) {
15236 SDValue Cond = N->getOperand(0);
15237 SDValue True = N->getOperand(1);
15238 SDValue False = N->getOperand(2);
15239 SDLoc DL(N);
15240 EVT VT = N->getValueType(0);
15241 EVT CondVT = Cond.getValueType();
15242
15243 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
15244 return SDValue();
15245
15246 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
15247 // BEXTI, where C is power of 2.
15248 if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&
15249 (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) {
15250 SDValue LHS = Cond.getOperand(0);
15251 SDValue RHS = Cond.getOperand(1);
15252 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
15253 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
15254 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
15255 const APInt &MaskVal = LHS.getConstantOperandAPInt(1);
15256 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))
15257 return DAG.getSelect(DL, VT,
15258 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
15259 False, True);
15260 }
15261 }
15262 return SDValue();
15263}
15264
15266 const RISCVSubtarget &Subtarget) {
15267 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
15268 return Folded;
15269
15270 if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
15271 return V;
15272
15273 if (Subtarget.hasConditionalMoveFusion())
15274 return SDValue();
15275
15276 SDValue TrueVal = N->getOperand(1);
15277 SDValue FalseVal = N->getOperand(2);
15278 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
15279 return V;
15280 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
15281}
15282
15283/// If we have a build_vector where each lane is binop X, C, where C
15284/// is a constant (but not necessarily the same constant on all lanes),
15285/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
15286/// We assume that materializing a constant build vector will be no more
15287/// expensive that performing O(n) binops.
15289 const RISCVSubtarget &Subtarget,
15290 const RISCVTargetLowering &TLI) {
15291 SDLoc DL(N);
15292 EVT VT = N->getValueType(0);
15293
15294 assert(!VT.isScalableVector() && "unexpected build vector");
15295
15296 if (VT.getVectorNumElements() == 1)
15297 return SDValue();
15298
15299 const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
15300 if (!TLI.isBinOp(Opcode))
15301 return SDValue();
15302
15303 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
15304 return SDValue();
15305
15306 // This BUILD_VECTOR involves an implicit truncation, and sinking
15307 // truncates through binops is non-trivial.
15308 if (N->op_begin()->getValueType() != VT.getVectorElementType())
15309 return SDValue();
15310
15311 SmallVector<SDValue> LHSOps;
15312 SmallVector<SDValue> RHSOps;
15313 for (SDValue Op : N->ops()) {
15314 if (Op.isUndef()) {
15315 // We can't form a divide or remainder from undef.
15316 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
15317 return SDValue();
15318
15319 LHSOps.push_back(Op);
15320 RHSOps.push_back(Op);
15321 continue;
15322 }
15323
15324 // TODO: We can handle operations which have an neutral rhs value
15325 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
15326 // of profit in a more explicit manner.
15327 if (Op.getOpcode() != Opcode || !Op.hasOneUse())
15328 return SDValue();
15329
15330 LHSOps.push_back(Op.getOperand(0));
15331 if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
15332 !isa<ConstantFPSDNode>(Op.getOperand(1)))
15333 return SDValue();
15334 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
15335 // have different LHS and RHS types.
15336 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
15337 return SDValue();
15338
15339 RHSOps.push_back(Op.getOperand(1));
15340 }
15341
15342 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
15343 DAG.getBuildVector(VT, DL, RHSOps));
15344}
15345
15347 const RISCVSubtarget &Subtarget,
15348 const RISCVTargetLowering &TLI) {
15349 SDValue InVec = N->getOperand(0);
15350 SDValue InVal = N->getOperand(1);
15351 SDValue EltNo = N->getOperand(2);
15352 SDLoc DL(N);
15353
15354 EVT VT = InVec.getValueType();
15355 if (VT.isScalableVector())
15356 return SDValue();
15357
15358 if (!InVec.hasOneUse())
15359 return SDValue();
15360
15361 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
15362 // move the insert_vector_elts into the arms of the binop. Note that
15363 // the new RHS must be a constant.
15364 const unsigned InVecOpcode = InVec->getOpcode();
15365 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
15366 InVal.hasOneUse()) {
15367 SDValue InVecLHS = InVec->getOperand(0);
15368 SDValue InVecRHS = InVec->getOperand(1);
15369 SDValue InValLHS = InVal->getOperand(0);
15370 SDValue InValRHS = InVal->getOperand(1);
15371
15373 return SDValue();
15374 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
15375 return SDValue();
15376 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
15377 // have different LHS and RHS types.
15378 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
15379 return SDValue();
15381 InVecLHS, InValLHS, EltNo);
15383 InVecRHS, InValRHS, EltNo);
15384 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
15385 }
15386
15387 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
15388 // move the insert_vector_elt to the source operand of the concat_vector.
15389 if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
15390 return SDValue();
15391
15392 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
15393 if (!IndexC)
15394 return SDValue();
15395 unsigned Elt = IndexC->getZExtValue();
15396
15397 EVT ConcatVT = InVec.getOperand(0).getValueType();
15398 if (ConcatVT.getVectorElementType() != InVal.getValueType())
15399 return SDValue();
15400 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
15401 SDValue NewIdx = DAG.getVectorIdxConstant(Elt % ConcatNumElts, DL);
15402
15403 unsigned ConcatOpIdx = Elt / ConcatNumElts;
15404 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
15405 ConcatOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ConcatVT,
15406 ConcatOp, InVal, NewIdx);
15407
15408 SmallVector<SDValue> ConcatOps;
15409 ConcatOps.append(InVec->op_begin(), InVec->op_end());
15410 ConcatOps[ConcatOpIdx] = ConcatOp;
15411 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
15412}
15413
15414// If we're concatenating a series of vector loads like
15415// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
15416// Then we can turn this into a strided load by widening the vector elements
15417// vlse32 p, stride=n
15419 const RISCVSubtarget &Subtarget,
15420 const RISCVTargetLowering &TLI) {
15421 SDLoc DL(N);
15422 EVT VT = N->getValueType(0);
15423
15424 // Only perform this combine on legal MVTs.
15425 if (!TLI.isTypeLegal(VT))
15426 return SDValue();
15427
15428 // TODO: Potentially extend this to scalable vectors
15429 if (VT.isScalableVector())
15430 return SDValue();
15431
15432 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
15433 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
15434 !SDValue(BaseLd, 0).hasOneUse())
15435 return SDValue();
15436
15437 EVT BaseLdVT = BaseLd->getValueType(0);
15438
15439 // Go through the loads and check that they're strided
15441 Lds.push_back(BaseLd);
15442 Align Align = BaseLd->getAlign();
15443 for (SDValue Op : N->ops().drop_front()) {
15444 auto *Ld = dyn_cast<LoadSDNode>(Op);
15445 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
15446 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
15447 Ld->getValueType(0) != BaseLdVT)
15448 return SDValue();
15449
15450 Lds.push_back(Ld);
15451
15452 // The common alignment is the most restrictive (smallest) of all the loads
15453 Align = std::min(Align, Ld->getAlign());
15454 }
15455
15456 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
15457 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
15458 LoadSDNode *Ld2) -> std::optional<PtrDiff> {
15459 // If the load ptrs can be decomposed into a common (Base + Index) with a
15460 // common constant stride, then return the constant stride.
15461 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
15462 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
15463 if (BIO1.equalBaseIndex(BIO2, DAG))
15464 return {{BIO2.getOffset() - BIO1.getOffset(), false}};
15465
15466 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
15467 SDValue P1 = Ld1->getBasePtr();
15468 SDValue P2 = Ld2->getBasePtr();
15469 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
15470 return {{P2.getOperand(1), false}};
15471 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
15472 return {{P1.getOperand(1), true}};
15473
15474 return std::nullopt;
15475 };
15476
15477 // Get the distance between the first and second loads
15478 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
15479 if (!BaseDiff)
15480 return SDValue();
15481
15482 // Check all the loads are the same distance apart
15483 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
15484 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
15485 return SDValue();
15486
15487 // TODO: At this point, we've successfully matched a generalized gather
15488 // load. Maybe we should emit that, and then move the specialized
15489 // matchers above and below into a DAG combine?
15490
15491 // Get the widened scalar type, e.g. v4i8 -> i64
15492 unsigned WideScalarBitWidth =
15493 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
15494 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
15495
15496 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
15497 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
15498 if (!TLI.isTypeLegal(WideVecVT))
15499 return SDValue();
15500
15501 // Check that the operation is legal
15502 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
15503 return SDValue();
15504
15505 auto [StrideVariant, MustNegateStride] = *BaseDiff;
15506 SDValue Stride = std::holds_alternative<SDValue>(StrideVariant)
15507 ? std::get<SDValue>(StrideVariant)
15508 : DAG.getConstant(std::get<int64_t>(StrideVariant), DL,
15509 Lds[0]->getOffset().getValueType());
15510 if (MustNegateStride)
15511 Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
15512
15513 SDVTList VTs = DAG.getVTList({WideVecVT, MVT::Other});
15514 SDValue IntID =
15515 DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL,
15516 Subtarget.getXLenVT());
15517
15518 SDValue AllOneMask =
15519 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
15520 DAG.getConstant(1, DL, MVT::i1));
15521
15522 SDValue Ops[] = {BaseLd->getChain(), IntID, DAG.getUNDEF(WideVecVT),
15523 BaseLd->getBasePtr(), Stride, AllOneMask};
15524
15525 uint64_t MemSize;
15526 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
15527 ConstStride && ConstStride->getSExtValue() >= 0)
15528 // total size = (elsize * n) + (stride - elsize) * (n-1)
15529 // = elsize + stride * (n-1)
15530 MemSize = WideScalarVT.getSizeInBits() +
15531 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
15532 else
15533 // If Stride isn't constant, then we can't know how much it will load
15535
15537 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
15538 Align);
15539
15540 SDValue StridedLoad = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs,
15541 Ops, WideVecVT, MMO);
15542 for (SDValue Ld : N->ops())
15543 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
15544
15545 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
15546}
15547
15549 const RISCVSubtarget &Subtarget) {
15550
15551 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
15552
15553 if (N->getValueType(0).isFixedLengthVector())
15554 return SDValue();
15555
15556 SDValue Addend = N->getOperand(0);
15557 SDValue MulOp = N->getOperand(1);
15558
15559 if (N->getOpcode() == RISCVISD::ADD_VL) {
15560 SDValue AddMergeOp = N->getOperand(2);
15561 if (!AddMergeOp.isUndef())
15562 return SDValue();
15563 }
15564
15565 auto IsVWMulOpc = [](unsigned Opc) {
15566 switch (Opc) {
15567 case RISCVISD::VWMUL_VL:
15570 return true;
15571 default:
15572 return false;
15573 }
15574 };
15575
15576 if (!IsVWMulOpc(MulOp.getOpcode()))
15577 std::swap(Addend, MulOp);
15578
15579 if (!IsVWMulOpc(MulOp.getOpcode()))
15580 return SDValue();
15581
15582 SDValue MulMergeOp = MulOp.getOperand(2);
15583
15584 if (!MulMergeOp.isUndef())
15585 return SDValue();
15586
15587 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
15588 const RISCVSubtarget &Subtarget) {
15589 if (N->getOpcode() == ISD::ADD) {
15590 SDLoc DL(N);
15591 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
15592 Subtarget);
15593 }
15594 return std::make_pair(N->getOperand(3), N->getOperand(4));
15595 }(N, DAG, Subtarget);
15596
15597 SDValue MulMask = MulOp.getOperand(3);
15598 SDValue MulVL = MulOp.getOperand(4);
15599
15600 if (AddMask != MulMask || AddVL != MulVL)
15601 return SDValue();
15602
15603 unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL;
15604 static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL,
15605 "Unexpected opcode after VWMACC_VL");
15606 static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL,
15607 "Unexpected opcode after VWMACC_VL!");
15608 static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL,
15609 "Unexpected opcode after VWMUL_VL!");
15610 static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL,
15611 "Unexpected opcode after VWMUL_VL!");
15612
15613 SDLoc DL(N);
15614 EVT VT = N->getValueType(0);
15615 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
15616 AddVL};
15617 return DAG.getNode(Opc, DL, VT, Ops);
15618}
15619
15621 ISD::MemIndexType &IndexType,
15623 if (!DCI.isBeforeLegalize())
15624 return false;
15625
15626 SelectionDAG &DAG = DCI.DAG;
15627 const MVT XLenVT =
15628 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
15629
15630 const EVT IndexVT = Index.getValueType();
15631
15632 // RISC-V indexed loads only support the "unsigned unscaled" addressing
15633 // mode, so anything else must be manually legalized.
15634 if (!isIndexTypeSigned(IndexType))
15635 return false;
15636
15637 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
15638 // Any index legalization should first promote to XLenVT, so we don't lose
15639 // bits when scaling. This may create an illegal index type so we let
15640 // LLVM's legalization take care of the splitting.
15641 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
15643 IndexVT.changeVectorElementType(XLenVT), Index);
15644 }
15645 IndexType = ISD::UNSIGNED_SCALED;
15646 return true;
15647}
15648
15649/// Match the index vector of a scatter or gather node as the shuffle mask
15650/// which performs the rearrangement if possible. Will only match if
15651/// all lanes are touched, and thus replacing the scatter or gather with
15652/// a unit strided access and shuffle is legal.
15654 SmallVector<int> &ShuffleMask) {
15655 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
15656 return false;
15658 return false;
15659
15660 const unsigned ElementSize = VT.getScalarStoreSize();
15661 const unsigned NumElems = VT.getVectorNumElements();
15662
15663 // Create the shuffle mask and check all bits active
15664 assert(ShuffleMask.empty());
15665 BitVector ActiveLanes(NumElems);
15666 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
15667 // TODO: We've found an active bit of UB, and could be
15668 // more aggressive here if desired.
15669 if (Index->getOperand(i)->isUndef())
15670 return false;
15671 uint64_t C = Index->getConstantOperandVal(i);
15672 if (C % ElementSize != 0)
15673 return false;
15674 C = C / ElementSize;
15675 if (C >= NumElems)
15676 return false;
15677 ShuffleMask.push_back(C);
15678 ActiveLanes.set(C);
15679 }
15680 return ActiveLanes.all();
15681}
15682
15683/// Match the index of a gather or scatter operation as an operation
15684/// with twice the element width and half the number of elements. This is
15685/// generally profitable (if legal) because these operations are linear
15686/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
15687/// come out ahead.
15689 Align BaseAlign, const RISCVSubtarget &ST) {
15690 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
15691 return false;
15693 return false;
15694
15695 // Attempt a doubling. If we can use a element type 4x or 8x in
15696 // size, this will happen via multiply iterations of the transform.
15697 const unsigned NumElems = VT.getVectorNumElements();
15698 if (NumElems % 2 != 0)
15699 return false;
15700
15701 const unsigned ElementSize = VT.getScalarStoreSize();
15702 const unsigned WiderElementSize = ElementSize * 2;
15703 if (WiderElementSize > ST.getELen()/8)
15704 return false;
15705
15706 if (!ST.hasFastUnalignedAccess() && BaseAlign < WiderElementSize)
15707 return false;
15708
15709 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
15710 // TODO: We've found an active bit of UB, and could be
15711 // more aggressive here if desired.
15712 if (Index->getOperand(i)->isUndef())
15713 return false;
15714 // TODO: This offset check is too strict if we support fully
15715 // misaligned memory operations.
15716 uint64_t C = Index->getConstantOperandVal(i);
15717 if (i % 2 == 0) {
15718 if (C % WiderElementSize != 0)
15719 return false;
15720 continue;
15721 }
15722 uint64_t Last = Index->getConstantOperandVal(i-1);
15723 if (C != Last + ElementSize)
15724 return false;
15725 }
15726 return true;
15727}
15728
15729
15731 DAGCombinerInfo &DCI) const {
15732 SelectionDAG &DAG = DCI.DAG;
15733 const MVT XLenVT = Subtarget.getXLenVT();
15734 SDLoc DL(N);
15735
15736 // Helper to call SimplifyDemandedBits on an operand of N where only some low
15737 // bits are demanded. N will be added to the Worklist if it was not deleted.
15738 // Caller should return SDValue(N, 0) if this returns true.
15739 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
15740 SDValue Op = N->getOperand(OpNo);
15741 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
15742 if (!SimplifyDemandedBits(Op, Mask, DCI))
15743 return false;
15744
15745 if (N->getOpcode() != ISD::DELETED_NODE)
15746 DCI.AddToWorklist(N);
15747 return true;
15748 };
15749
15750 switch (N->getOpcode()) {
15751 default:
15752 break;
15753 case RISCVISD::SplitF64: {
15754 SDValue Op0 = N->getOperand(0);
15755 // If the input to SplitF64 is just BuildPairF64 then the operation is
15756 // redundant. Instead, use BuildPairF64's operands directly.
15757 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
15758 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
15759
15760 if (Op0->isUndef()) {
15761 SDValue Lo = DAG.getUNDEF(MVT::i32);
15762 SDValue Hi = DAG.getUNDEF(MVT::i32);
15763 return DCI.CombineTo(N, Lo, Hi);
15764 }
15765
15766 // It's cheaper to materialise two 32-bit integers than to load a double
15767 // from the constant pool and transfer it to integer registers through the
15768 // stack.
15769 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
15770 APInt V = C->getValueAPF().bitcastToAPInt();
15771 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
15772 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
15773 return DCI.CombineTo(N, Lo, Hi);
15774 }
15775
15776 // This is a target-specific version of a DAGCombine performed in
15777 // DAGCombiner::visitBITCAST. It performs the equivalent of:
15778 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
15779 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
15780 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
15781 !Op0.getNode()->hasOneUse())
15782 break;
15783 SDValue NewSplitF64 =
15784 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
15785 Op0.getOperand(0));
15786 SDValue Lo = NewSplitF64.getValue(0);
15787 SDValue Hi = NewSplitF64.getValue(1);
15788 APInt SignBit = APInt::getSignMask(32);
15789 if (Op0.getOpcode() == ISD::FNEG) {
15790 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
15791 DAG.getConstant(SignBit, DL, MVT::i32));
15792 return DCI.CombineTo(N, Lo, NewHi);
15793 }
15794 assert(Op0.getOpcode() == ISD::FABS);
15795 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
15796 DAG.getConstant(~SignBit, DL, MVT::i32));
15797 return DCI.CombineTo(N, Lo, NewHi);
15798 }
15799 case RISCVISD::SLLW:
15800 case RISCVISD::SRAW:
15801 case RISCVISD::SRLW:
15802 case RISCVISD::RORW:
15803 case RISCVISD::ROLW: {
15804 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
15805 if (SimplifyDemandedLowBitsHelper(0, 32) ||
15806 SimplifyDemandedLowBitsHelper(1, 5))
15807 return SDValue(N, 0);
15808
15809 break;
15810 }
15811 case RISCVISD::CLZW:
15812 case RISCVISD::CTZW: {
15813 // Only the lower 32 bits of the first operand are read
15814 if (SimplifyDemandedLowBitsHelper(0, 32))
15815 return SDValue(N, 0);
15816 break;
15817 }
15819 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
15820 // conversion is unnecessary and can be replaced with the
15821 // FMV_X_ANYEXTW_RV64 operand.
15822 SDValue Op0 = N->getOperand(0);
15824 return Op0.getOperand(0);
15825 break;
15826 }
15829 SDLoc DL(N);
15830 SDValue Op0 = N->getOperand(0);
15831 MVT VT = N->getSimpleValueType(0);
15832 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
15833 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
15834 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
15835 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
15836 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
15837 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
15838 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
15839 assert(Op0.getOperand(0).getValueType() == VT &&
15840 "Unexpected value type!");
15841 return Op0.getOperand(0);
15842 }
15843
15844 // This is a target-specific version of a DAGCombine performed in
15845 // DAGCombiner::visitBITCAST. It performs the equivalent of:
15846 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
15847 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
15848 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
15849 !Op0.getNode()->hasOneUse())
15850 break;
15851 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
15852 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
15853 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
15854 if (Op0.getOpcode() == ISD::FNEG)
15855 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
15856 DAG.getConstant(SignBit, DL, VT));
15857
15858 assert(Op0.getOpcode() == ISD::FABS);
15859 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
15860 DAG.getConstant(~SignBit, DL, VT));
15861 }
15862 case ISD::ABS: {
15863 EVT VT = N->getValueType(0);
15864 SDValue N0 = N->getOperand(0);
15865 // abs (sext) -> zext (abs)
15866 // abs (zext) -> zext (handled elsewhere)
15867 if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
15868 SDValue Src = N0.getOperand(0);
15869 SDLoc DL(N);
15870 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
15871 DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src));
15872 }
15873 break;
15874 }
15875 case ISD::ADD: {
15876 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
15877 return V;
15878 if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
15879 return V;
15880 return performADDCombine(N, DAG, Subtarget);
15881 }
15882 case ISD::SUB: {
15883 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
15884 return V;
15885 return performSUBCombine(N, DAG, Subtarget);
15886 }
15887 case ISD::AND:
15888 return performANDCombine(N, DCI, Subtarget);
15889 case ISD::OR:
15890 return performORCombine(N, DCI, Subtarget);
15891 case ISD::XOR:
15892 return performXORCombine(N, DAG, Subtarget);
15893 case ISD::MUL:
15894 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
15895 return V;
15896 return performMULCombine(N, DAG);
15897 case ISD::FADD:
15898 case ISD::UMAX:
15899 case ISD::UMIN:
15900 case ISD::SMAX:
15901 case ISD::SMIN:
15902 case ISD::FMAXNUM:
15903 case ISD::FMINNUM: {
15904 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
15905 return V;
15906 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
15907 return V;
15908 return SDValue();
15909 }
15910 case ISD::SETCC:
15911 return performSETCCCombine(N, DAG, Subtarget);
15913 return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
15914 case ISD::ZERO_EXTEND:
15915 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
15916 // type legalization. This is safe because fp_to_uint produces poison if
15917 // it overflows.
15918 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
15919 SDValue Src = N->getOperand(0);
15920 if (Src.getOpcode() == ISD::FP_TO_UINT &&
15921 isTypeLegal(Src.getOperand(0).getValueType()))
15922 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
15923 Src.getOperand(0));
15924 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
15925 isTypeLegal(Src.getOperand(1).getValueType())) {
15926 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
15927 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
15928 Src.getOperand(0), Src.getOperand(1));
15929 DCI.CombineTo(N, Res);
15930 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
15931 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
15932 return SDValue(N, 0); // Return N so it doesn't get rechecked.
15933 }
15934 }
15935 return SDValue();
15937 // trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
15938 // This would be benefit for the cases where X and Y are both the same value
15939 // type of low precision vectors. Since the truncate would be lowered into
15940 // n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
15941 // restriction, such pattern would be expanded into a series of "vsetvli"
15942 // and "vnsrl" instructions later to reach this point.
15943 auto IsTruncNode = [](SDValue V) {
15944 if (V.getOpcode() != RISCVISD::TRUNCATE_VECTOR_VL)
15945 return false;
15946 SDValue VL = V.getOperand(2);
15947 auto *C = dyn_cast<ConstantSDNode>(VL);
15948 // Assume all TRUNCATE_VECTOR_VL nodes use VLMAX for VMSET_VL operand
15949 bool IsVLMAXForVMSET = (C && C->isAllOnes()) ||
15950 (isa<RegisterSDNode>(VL) &&
15951 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
15952 return V.getOperand(1).getOpcode() == RISCVISD::VMSET_VL &&
15953 IsVLMAXForVMSET;
15954 };
15955
15956 SDValue Op = N->getOperand(0);
15957
15958 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
15959 // to distinguish such pattern.
15960 while (IsTruncNode(Op)) {
15961 if (!Op.hasOneUse())
15962 return SDValue();
15963 Op = Op.getOperand(0);
15964 }
15965
15966 if (Op.getOpcode() == ISD::SRA && Op.hasOneUse()) {
15967 SDValue N0 = Op.getOperand(0);
15968 SDValue N1 = Op.getOperand(1);
15969 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
15970 N1.getOpcode() == ISD::ZERO_EXTEND && N1.hasOneUse()) {
15971 SDValue N00 = N0.getOperand(0);
15972 SDValue N10 = N1.getOperand(0);
15973 if (N00.getValueType().isVector() &&
15974 N00.getValueType() == N10.getValueType() &&
15975 N->getValueType(0) == N10.getValueType()) {
15976 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
15977 SDValue SMin = DAG.getNode(
15978 ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
15979 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
15980 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
15981 }
15982 }
15983 }
15984 break;
15985 }
15986 case ISD::TRUNCATE:
15987 return performTRUNCATECombine(N, DAG, Subtarget);
15988 case ISD::SELECT:
15989 return performSELECTCombine(N, DAG, Subtarget);
15992 // czero_eq X, (xor Y, 1) -> czero_ne X, Y if Y is 0 or 1.
15993 // czero_ne X, (xor Y, 1) -> czero_eq X, Y if Y is 0 or 1.
15994 if (N->getOperand(1).getOpcode() == ISD::XOR &&
15995 isOneConstant(N->getOperand(1).getOperand(1))) {
15996 SDValue Cond = N->getOperand(1).getOperand(0);
15997 APInt Mask = APInt::getBitsSetFrom(Cond.getValueSizeInBits(), 1);
15998 if (DAG.MaskedValueIsZero(Cond, Mask)) {
15999 unsigned NewOpc = N->getOpcode() == RISCVISD::CZERO_EQZ
16002 return DAG.getNode(NewOpc, SDLoc(N), N->getValueType(0),
16003 N->getOperand(0), Cond);
16004 }
16005 }
16006 return SDValue();
16007
16008 case RISCVISD::SELECT_CC: {
16009 // Transform
16010 SDValue LHS = N->getOperand(0);
16011 SDValue RHS = N->getOperand(1);
16012 SDValue CC = N->getOperand(2);
16013 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
16014 SDValue TrueV = N->getOperand(3);
16015 SDValue FalseV = N->getOperand(4);
16016 SDLoc DL(N);
16017 EVT VT = N->getValueType(0);
16018
16019 // If the True and False values are the same, we don't need a select_cc.
16020 if (TrueV == FalseV)
16021 return TrueV;
16022
16023 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
16024 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
16025 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
16026 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
16027 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
16028 if (CCVal == ISD::CondCode::SETGE)
16029 std::swap(TrueV, FalseV);
16030
16031 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
16032 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
16033 // Only handle simm12, if it is not in this range, it can be considered as
16034 // register.
16035 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
16036 isInt<12>(TrueSImm - FalseSImm)) {
16037 SDValue SRA =
16038 DAG.getNode(ISD::SRA, DL, VT, LHS,
16039 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
16040 SDValue AND =
16041 DAG.getNode(ISD::AND, DL, VT, SRA,
16042 DAG.getConstant(TrueSImm - FalseSImm, DL, VT));
16043 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
16044 }
16045
16046 if (CCVal == ISD::CondCode::SETGE)
16047 std::swap(TrueV, FalseV);
16048 }
16049
16050 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
16051 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
16052 {LHS, RHS, CC, TrueV, FalseV});
16053
16054 if (!Subtarget.hasConditionalMoveFusion()) {
16055 // (select c, -1, y) -> -c | y
16056 if (isAllOnesConstant(TrueV)) {
16057 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
16058 SDValue Neg = DAG.getNegative(C, DL, VT);
16059 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
16060 }
16061 // (select c, y, -1) -> -!c | y
16062 if (isAllOnesConstant(FalseV)) {
16063 SDValue C =
16064 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
16065 SDValue Neg = DAG.getNegative(C, DL, VT);
16066 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
16067 }
16068
16069 // (select c, 0, y) -> -!c & y
16070 if (isNullConstant(TrueV)) {
16071 SDValue C =
16072 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
16073 SDValue Neg = DAG.getNegative(C, DL, VT);
16074 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
16075 }
16076 // (select c, y, 0) -> -c & y
16077 if (isNullConstant(FalseV)) {
16078 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
16079 SDValue Neg = DAG.getNegative(C, DL, VT);
16080 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
16081 }
16082 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
16083 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
16084 if (((isOneConstant(FalseV) && LHS == TrueV &&
16085 CCVal == ISD::CondCode::SETNE) ||
16086 (isOneConstant(TrueV) && LHS == FalseV &&
16087 CCVal == ISD::CondCode::SETEQ)) &&
16089 // freeze it to be safe.
16090 LHS = DAG.getFreeze(LHS);
16092 return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
16093 }
16094 }
16095
16096 // If both true/false are an xor with 1, pull through the select.
16097 // This can occur after op legalization if both operands are setccs that
16098 // require an xor to invert.
16099 // FIXME: Generalize to other binary ops with identical operand?
16100 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
16101 TrueV.getOperand(1) == FalseV.getOperand(1) &&
16102 isOneConstant(TrueV.getOperand(1)) &&
16103 TrueV.hasOneUse() && FalseV.hasOneUse()) {
16104 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
16105 TrueV.getOperand(0), FalseV.getOperand(0));
16106 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
16107 }
16108
16109 return SDValue();
16110 }
16111 case RISCVISD::BR_CC: {
16112 SDValue LHS = N->getOperand(1);
16113 SDValue RHS = N->getOperand(2);
16114 SDValue CC = N->getOperand(3);
16115 SDLoc DL(N);
16116
16117 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
16118 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
16119 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
16120
16121 return SDValue();
16122 }
16123 case ISD::BITREVERSE:
16124 return performBITREVERSECombine(N, DAG, Subtarget);
16125 case ISD::FP_TO_SINT:
16126 case ISD::FP_TO_UINT:
16127 return performFP_TO_INTCombine(N, DCI, Subtarget);
16130 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
16131 case ISD::FCOPYSIGN: {
16132 EVT VT = N->getValueType(0);
16133 if (!VT.isVector())
16134 break;
16135 // There is a form of VFSGNJ which injects the negated sign of its second
16136 // operand. Try and bubble any FNEG up after the extend/round to produce
16137 // this optimized pattern. Avoid modifying cases where FP_ROUND and
16138 // TRUNC=1.
16139 SDValue In2 = N->getOperand(1);
16140 // Avoid cases where the extend/round has multiple uses, as duplicating
16141 // those is typically more expensive than removing a fneg.
16142 if (!In2.hasOneUse())
16143 break;
16144 if (In2.getOpcode() != ISD::FP_EXTEND &&
16145 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
16146 break;
16147 In2 = In2.getOperand(0);
16148 if (In2.getOpcode() != ISD::FNEG)
16149 break;
16150 SDLoc DL(N);
16151 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
16152 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
16153 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
16154 }
16155 case ISD::MGATHER: {
16156 const auto *MGN = dyn_cast<MaskedGatherSDNode>(N);
16157 const EVT VT = N->getValueType(0);
16158 SDValue Index = MGN->getIndex();
16159 SDValue ScaleOp = MGN->getScale();
16160 ISD::MemIndexType IndexType = MGN->getIndexType();
16161 assert(!MGN->isIndexScaled() &&
16162 "Scaled gather/scatter should not be formed");
16163
16164 SDLoc DL(N);
16165 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16166 return DAG.getMaskedGather(
16167 N->getVTList(), MGN->getMemoryVT(), DL,
16168 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
16169 MGN->getBasePtr(), Index, ScaleOp},
16170 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
16171
16172 if (narrowIndex(Index, IndexType, DAG))
16173 return DAG.getMaskedGather(
16174 N->getVTList(), MGN->getMemoryVT(), DL,
16175 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
16176 MGN->getBasePtr(), Index, ScaleOp},
16177 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
16178
16179 if (Index.getOpcode() == ISD::BUILD_VECTOR &&
16180 MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
16181 // The sequence will be XLenVT, not the type of Index. Tell
16182 // isSimpleVIDSequence this so we avoid overflow.
16183 if (std::optional<VIDSequence> SimpleVID =
16184 isSimpleVIDSequence(Index, Subtarget.getXLen());
16185 SimpleVID && SimpleVID->StepDenominator == 1) {
16186 const int64_t StepNumerator = SimpleVID->StepNumerator;
16187 const int64_t Addend = SimpleVID->Addend;
16188
16189 // Note: We don't need to check alignment here since (by assumption
16190 // from the existance of the gather), our offsets must be sufficiently
16191 // aligned.
16192
16193 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
16194 assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
16195 assert(IndexType == ISD::UNSIGNED_SCALED);
16196 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
16197 DAG.getConstant(Addend, DL, PtrVT));
16198
16199 SDVTList VTs = DAG.getVTList({VT, MVT::Other});
16200 SDValue IntID =
16201 DAG.getTargetConstant(Intrinsic::riscv_masked_strided_load, DL,
16202 XLenVT);
16203 SDValue Ops[] =
16204 {MGN->getChain(), IntID, MGN->getPassThru(), BasePtr,
16205 DAG.getConstant(StepNumerator, DL, XLenVT), MGN->getMask()};
16207 Ops, VT, MGN->getMemOperand());
16208 }
16209 }
16210
16211 SmallVector<int> ShuffleMask;
16212 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
16213 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
16214 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
16215 MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
16216 MGN->getMask(), DAG.getUNDEF(VT),
16217 MGN->getMemoryVT(), MGN->getMemOperand(),
16219 SDValue Shuffle =
16220 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
16221 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
16222 }
16223
16224 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
16225 matchIndexAsWiderOp(VT, Index, MGN->getMask(),
16226 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
16227 SmallVector<SDValue> NewIndices;
16228 for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
16229 NewIndices.push_back(Index.getOperand(i));
16230 EVT IndexVT = Index.getValueType()
16232 Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
16233
16234 unsigned ElementSize = VT.getScalarStoreSize();
16235 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
16236 auto EltCnt = VT.getVectorElementCount();
16237 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
16238 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
16239 EltCnt.divideCoefficientBy(2));
16240 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
16241 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
16242 EltCnt.divideCoefficientBy(2));
16243 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
16244
16245 SDValue Gather =
16246 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
16247 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
16248 Index, ScaleOp},
16249 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
16250 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
16251 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
16252 }
16253 break;
16254 }
16255 case ISD::MSCATTER:{
16256 const auto *MSN = dyn_cast<MaskedScatterSDNode>(N);
16257 SDValue Index = MSN->getIndex();
16258 SDValue ScaleOp = MSN->getScale();
16259 ISD::MemIndexType IndexType = MSN->getIndexType();
16260 assert(!MSN->isIndexScaled() &&
16261 "Scaled gather/scatter should not be formed");
16262
16263 SDLoc DL(N);
16264 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16265 return DAG.getMaskedScatter(
16266 N->getVTList(), MSN->getMemoryVT(), DL,
16267 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
16268 Index, ScaleOp},
16269 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
16270
16271 if (narrowIndex(Index, IndexType, DAG))
16272 return DAG.getMaskedScatter(
16273 N->getVTList(), MSN->getMemoryVT(), DL,
16274 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
16275 Index, ScaleOp},
16276 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
16277
16278 EVT VT = MSN->getValue()->getValueType(0);
16279 SmallVector<int> ShuffleMask;
16280 if (!MSN->isTruncatingStore() &&
16281 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
16282 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
16283 DAG.getUNDEF(VT), ShuffleMask);
16284 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
16285 DAG.getUNDEF(XLenVT), MSN->getMask(),
16286 MSN->getMemoryVT(), MSN->getMemOperand(),
16287 ISD::UNINDEXED, false);
16288 }
16289 break;
16290 }
16291 case ISD::VP_GATHER: {
16292 const auto *VPGN = dyn_cast<VPGatherSDNode>(N);
16293 SDValue Index = VPGN->getIndex();
16294 SDValue ScaleOp = VPGN->getScale();
16295 ISD::MemIndexType IndexType = VPGN->getIndexType();
16296 assert(!VPGN->isIndexScaled() &&
16297 "Scaled gather/scatter should not be formed");
16298
16299 SDLoc DL(N);
16300 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16301 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
16302 {VPGN->getChain(), VPGN->getBasePtr(), Index,
16303 ScaleOp, VPGN->getMask(),
16304 VPGN->getVectorLength()},
16305 VPGN->getMemOperand(), IndexType);
16306
16307 if (narrowIndex(Index, IndexType, DAG))
16308 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
16309 {VPGN->getChain(), VPGN->getBasePtr(), Index,
16310 ScaleOp, VPGN->getMask(),
16311 VPGN->getVectorLength()},
16312 VPGN->getMemOperand(), IndexType);
16313
16314 break;
16315 }
16316 case ISD::VP_SCATTER: {
16317 const auto *VPSN = dyn_cast<VPScatterSDNode>(N);
16318 SDValue Index = VPSN->getIndex();
16319 SDValue ScaleOp = VPSN->getScale();
16320 ISD::MemIndexType IndexType = VPSN->getIndexType();
16321 assert(!VPSN->isIndexScaled() &&
16322 "Scaled gather/scatter should not be formed");
16323
16324 SDLoc DL(N);
16325 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
16326 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
16327 {VPSN->getChain(), VPSN->getValue(),
16328 VPSN->getBasePtr(), Index, ScaleOp,
16329 VPSN->getMask(), VPSN->getVectorLength()},
16330 VPSN->getMemOperand(), IndexType);
16331
16332 if (narrowIndex(Index, IndexType, DAG))
16333 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
16334 {VPSN->getChain(), VPSN->getValue(),
16335 VPSN->getBasePtr(), Index, ScaleOp,
16336 VPSN->getMask(), VPSN->getVectorLength()},
16337 VPSN->getMemOperand(), IndexType);
16338 break;
16339 }
16340 case RISCVISD::SRA_VL:
16341 case RISCVISD::SRL_VL:
16342 case RISCVISD::SHL_VL: {
16343 SDValue ShAmt = N->getOperand(1);
16345 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
16346 SDLoc DL(N);
16347 SDValue VL = N->getOperand(4);
16348 EVT VT = N->getValueType(0);
16349 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
16350 ShAmt.getOperand(1), VL);
16351 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
16352 N->getOperand(2), N->getOperand(3), N->getOperand(4));
16353 }
16354 break;
16355 }
16356 case ISD::SRA:
16357 if (SDValue V = performSRACombine(N, DAG, Subtarget))
16358 return V;
16359 [[fallthrough]];
16360 case ISD::SRL:
16361 case ISD::SHL: {
16362 SDValue ShAmt = N->getOperand(1);
16364 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
16365 SDLoc DL(N);
16366 EVT VT = N->getValueType(0);
16367 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
16368 ShAmt.getOperand(1),
16369 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
16370 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
16371 }
16372 break;
16373 }
16374 case RISCVISD::ADD_VL:
16375 if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
16376 return V;
16377 return combineToVWMACC(N, DAG, Subtarget);
16382 return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
16383 case RISCVISD::SUB_VL:
16384 case RISCVISD::MUL_VL:
16385 return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget);
16394 return performVFMADD_VLCombine(N, DAG, Subtarget);
16395 case RISCVISD::FADD_VL:
16396 case RISCVISD::FSUB_VL:
16397 case RISCVISD::FMUL_VL:
16399 case RISCVISD::VFWSUB_W_VL: {
16400 if (N->getValueType(0).isScalableVector() &&
16401 N->getValueType(0).getVectorElementType() == MVT::f32 &&
16402 (Subtarget.hasVInstructionsF16Minimal() &&
16403 !Subtarget.hasVInstructionsF16()))
16404 return SDValue();
16405 return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget);
16406 }
16407 case ISD::LOAD:
16408 case ISD::STORE: {
16409 if (DCI.isAfterLegalizeDAG())
16410 if (SDValue V = performMemPairCombine(N, DCI))
16411 return V;
16412
16413 if (N->getOpcode() != ISD::STORE)
16414 break;
16415
16416 auto *Store = cast<StoreSDNode>(N);
16417 SDValue Chain = Store->getChain();
16418 EVT MemVT = Store->getMemoryVT();
16419 SDValue Val = Store->getValue();
16420 SDLoc DL(N);
16421
16422 bool IsScalarizable =
16423 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
16424 Store->isSimple() &&
16425 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
16426 isPowerOf2_64(MemVT.getSizeInBits()) &&
16427 MemVT.getSizeInBits() <= Subtarget.getXLen();
16428
16429 // If sufficiently aligned we can scalarize stores of constant vectors of
16430 // any power-of-two size up to XLen bits, provided that they aren't too
16431 // expensive to materialize.
16432 // vsetivli zero, 2, e8, m1, ta, ma
16433 // vmv.v.i v8, 4
16434 // vse64.v v8, (a0)
16435 // ->
16436 // li a1, 1028
16437 // sh a1, 0(a0)
16438 if (DCI.isBeforeLegalize() && IsScalarizable &&
16440 // Get the constant vector bits
16441 APInt NewC(Val.getValueSizeInBits(), 0);
16442 uint64_t EltSize = Val.getScalarValueSizeInBits();
16443 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
16444 if (Val.getOperand(i).isUndef())
16445 continue;
16446 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
16447 i * EltSize);
16448 }
16449 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
16450
16451 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
16452 true) <= 2 &&
16454 NewVT, *Store->getMemOperand())) {
16455 SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
16456 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
16457 Store->getPointerInfo(), Store->getOriginalAlign(),
16458 Store->getMemOperand()->getFlags());
16459 }
16460 }
16461
16462 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
16463 // vsetivli zero, 2, e16, m1, ta, ma
16464 // vle16.v v8, (a0)
16465 // vse16.v v8, (a1)
16466 if (auto *L = dyn_cast<LoadSDNode>(Val);
16467 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
16468 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
16469 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
16470 L->getMemoryVT() == MemVT) {
16471 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
16473 NewVT, *Store->getMemOperand()) &&
16475 NewVT, *L->getMemOperand())) {
16476 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
16477 L->getPointerInfo(), L->getOriginalAlign(),
16478 L->getMemOperand()->getFlags());
16479 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
16480 Store->getPointerInfo(), Store->getOriginalAlign(),
16481 Store->getMemOperand()->getFlags());
16482 }
16483 }
16484
16485 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
16486 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
16487 // any illegal types.
16488 if (Val.getOpcode() == RISCVISD::VMV_X_S ||
16489 (DCI.isAfterLegalizeDAG() &&
16491 isNullConstant(Val.getOperand(1)))) {
16492 SDValue Src = Val.getOperand(0);
16493 MVT VecVT = Src.getSimpleValueType();
16494 // VecVT should be scalable and memory VT should match the element type.
16495 if (!Store->isIndexed() && VecVT.isScalableVector() &&
16496 MemVT == VecVT.getVectorElementType()) {
16497 SDLoc DL(N);
16498 MVT MaskVT = getMaskTypeFor(VecVT);
16499 return DAG.getStoreVP(
16500 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
16501 DAG.getConstant(1, DL, MaskVT),
16502 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
16503 Store->getMemOperand(), Store->getAddressingMode(),
16504 Store->isTruncatingStore(), /*IsCompress*/ false);
16505 }
16506 }
16507
16508 break;
16509 }
16510 case ISD::SPLAT_VECTOR: {
16511 EVT VT = N->getValueType(0);
16512 // Only perform this combine on legal MVT types.
16513 if (!isTypeLegal(VT))
16514 break;
16515 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
16516 DAG, Subtarget))
16517 return Gather;
16518 break;
16519 }
16520 case ISD::BUILD_VECTOR:
16521 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
16522 return V;
16523 break;
16525 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
16526 return V;
16527 break;
16529 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
16530 return V;
16531 break;
16532 case RISCVISD::VFMV_V_F_VL: {
16533 const MVT VT = N->getSimpleValueType(0);
16534 SDValue Passthru = N->getOperand(0);
16535 SDValue Scalar = N->getOperand(1);
16536 SDValue VL = N->getOperand(2);
16537
16538 // If VL is 1, we can use vfmv.s.f.
16539 if (isOneConstant(VL))
16540 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
16541 break;
16542 }
16543 case RISCVISD::VMV_V_X_VL: {
16544 const MVT VT = N->getSimpleValueType(0);
16545 SDValue Passthru = N->getOperand(0);
16546 SDValue Scalar = N->getOperand(1);
16547 SDValue VL = N->getOperand(2);
16548
16549 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
16550 // scalar input.
16551 unsigned ScalarSize = Scalar.getValueSizeInBits();
16552 unsigned EltWidth = VT.getScalarSizeInBits();
16553 if (ScalarSize > EltWidth && Passthru.isUndef())
16554 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
16555 return SDValue(N, 0);
16556
16557 // If VL is 1 and the scalar value won't benefit from immediate, we can
16558 // use vmv.s.x.
16559 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
16560 if (isOneConstant(VL) &&
16561 (!Const || Const->isZero() ||
16562 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
16563 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
16564
16565 break;
16566 }
16567 case RISCVISD::VFMV_S_F_VL: {
16568 SDValue Src = N->getOperand(1);
16569 // Try to remove vector->scalar->vector if the scalar->vector is inserting
16570 // into an undef vector.
16571 // TODO: Could use a vslide or vmv.v.v for non-undef.
16572 if (N->getOperand(0).isUndef() &&
16573 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16574 isNullConstant(Src.getOperand(1)) &&
16575 Src.getOperand(0).getValueType().isScalableVector()) {
16576 EVT VT = N->getValueType(0);
16577 EVT SrcVT = Src.getOperand(0).getValueType();
16579 // Widths match, just return the original vector.
16580 if (SrcVT == VT)
16581 return Src.getOperand(0);
16582 // TODO: Use insert_subvector/extract_subvector to change widen/narrow?
16583 }
16584 [[fallthrough]];
16585 }
16586 case RISCVISD::VMV_S_X_VL: {
16587 const MVT VT = N->getSimpleValueType(0);
16588 SDValue Passthru = N->getOperand(0);
16589 SDValue Scalar = N->getOperand(1);
16590 SDValue VL = N->getOperand(2);
16591
16592 // Use M1 or smaller to avoid over constraining register allocation
16593 const MVT M1VT = getLMUL1VT(VT);
16594 if (M1VT.bitsLT(VT)) {
16595 SDValue M1Passthru =
16596 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Passthru,
16597 DAG.getVectorIdxConstant(0, DL));
16598 SDValue Result =
16599 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
16600 Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, Result,
16601 DAG.getVectorIdxConstant(0, DL));
16602 return Result;
16603 }
16604
16605 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
16606 // higher would involve overly constraining the register allocator for
16607 // no purpose.
16608 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
16609 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
16610 VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef())
16611 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
16612
16613 break;
16614 }
16615 case RISCVISD::VMV_X_S: {
16616 SDValue Vec = N->getOperand(0);
16617 MVT VecVT = N->getOperand(0).getSimpleValueType();
16618 const MVT M1VT = getLMUL1VT(VecVT);
16619 if (M1VT.bitsLT(VecVT)) {
16620 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
16621 DAG.getVectorIdxConstant(0, DL));
16622 return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec);
16623 }
16624 break;
16625 }
16629 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
16630 unsigned IntNo = N->getConstantOperandVal(IntOpNo);
16631 switch (IntNo) {
16632 // By default we do not combine any intrinsic.
16633 default:
16634 return SDValue();
16635 case Intrinsic::riscv_masked_strided_load: {
16636 MVT VT = N->getSimpleValueType(0);
16637 auto *Load = cast<MemIntrinsicSDNode>(N);
16638 SDValue PassThru = N->getOperand(2);
16639 SDValue Base = N->getOperand(3);
16640 SDValue Stride = N->getOperand(4);
16641 SDValue Mask = N->getOperand(5);
16642
16643 // If the stride is equal to the element size in bytes, we can use
16644 // a masked.load.
16645 const unsigned ElementSize = VT.getScalarStoreSize();
16646 if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
16647 StrideC && StrideC->getZExtValue() == ElementSize)
16648 return DAG.getMaskedLoad(VT, DL, Load->getChain(), Base,
16649 DAG.getUNDEF(XLenVT), Mask, PassThru,
16650 Load->getMemoryVT(), Load->getMemOperand(),
16652 return SDValue();
16653 }
16654 case Intrinsic::riscv_masked_strided_store: {
16655 auto *Store = cast<MemIntrinsicSDNode>(N);
16656 SDValue Value = N->getOperand(2);
16657 SDValue Base = N->getOperand(3);
16658 SDValue Stride = N->getOperand(4);
16659 SDValue Mask = N->getOperand(5);
16660
16661 // If the stride is equal to the element size in bytes, we can use
16662 // a masked.store.
16663 const unsigned ElementSize = Value.getValueType().getScalarStoreSize();
16664 if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
16665 StrideC && StrideC->getZExtValue() == ElementSize)
16666 return DAG.getMaskedStore(Store->getChain(), DL, Value, Base,
16667 DAG.getUNDEF(XLenVT), Mask,
16668 Store->getMemoryVT(), Store->getMemOperand(),
16669 ISD::UNINDEXED, false);
16670 return SDValue();
16671 }
16672 case Intrinsic::riscv_vcpop:
16673 case Intrinsic::riscv_vcpop_mask:
16674 case Intrinsic::riscv_vfirst:
16675 case Intrinsic::riscv_vfirst_mask: {
16676 SDValue VL = N->getOperand(2);
16677 if (IntNo == Intrinsic::riscv_vcpop_mask ||
16678 IntNo == Intrinsic::riscv_vfirst_mask)
16679 VL = N->getOperand(3);
16680 if (!isNullConstant(VL))
16681 return SDValue();
16682 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
16683 SDLoc DL(N);
16684 EVT VT = N->getValueType(0);
16685 if (IntNo == Intrinsic::riscv_vfirst ||
16686 IntNo == Intrinsic::riscv_vfirst_mask)
16687 return DAG.getConstant(-1, DL, VT);
16688 return DAG.getConstant(0, DL, VT);
16689 }
16690 }
16691 }
16692 case ISD::BITCAST: {
16694 SDValue N0 = N->getOperand(0);
16695 EVT VT = N->getValueType(0);
16696 EVT SrcVT = N0.getValueType();
16697 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
16698 // type, widen both sides to avoid a trip through memory.
16699 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
16700 VT.isScalarInteger()) {
16701 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
16702 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
16703 Ops[0] = N0;
16704 SDLoc DL(N);
16705 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
16706 N0 = DAG.getBitcast(MVT::i8, N0);
16707 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
16708 }
16709
16710 return SDValue();
16711 }
16712 }
16713
16714 return SDValue();
16715}
16716
16718 EVT XVT, unsigned KeptBits) const {
16719 // For vectors, we don't have a preference..
16720 if (XVT.isVector())
16721 return false;
16722
16723 if (XVT != MVT::i32 && XVT != MVT::i64)
16724 return false;
16725
16726 // We can use sext.w for RV64 or an srai 31 on RV32.
16727 if (KeptBits == 32 || KeptBits == 64)
16728 return true;
16729
16730 // With Zbb we can use sext.h/sext.b.
16731 return Subtarget.hasStdExtZbb() &&
16732 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
16733 KeptBits == 16);
16734}
16735
16737 const SDNode *N, CombineLevel Level) const {
16738 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
16739 N->getOpcode() == ISD::SRL) &&
16740 "Expected shift op");
16741
16742 // The following folds are only desirable if `(OP _, c1 << c2)` can be
16743 // materialised in fewer instructions than `(OP _, c1)`:
16744 //
16745 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
16746 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
16747 SDValue N0 = N->getOperand(0);
16748 EVT Ty = N0.getValueType();
16749 if (Ty.isScalarInteger() &&
16750 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
16751 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
16752 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
16753 if (C1 && C2) {
16754 const APInt &C1Int = C1->getAPIntValue();
16755 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
16756
16757 // We can materialise `c1 << c2` into an add immediate, so it's "free",
16758 // and the combine should happen, to potentially allow further combines
16759 // later.
16760 if (ShiftedC1Int.getSignificantBits() <= 64 &&
16761 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
16762 return true;
16763
16764 // We can materialise `c1` in an add immediate, so it's "free", and the
16765 // combine should be prevented.
16766 if (C1Int.getSignificantBits() <= 64 &&
16768 return false;
16769
16770 // Neither constant will fit into an immediate, so find materialisation
16771 // costs.
16772 int C1Cost =
16773 RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
16774 /*CompressionCost*/ true);
16775 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
16776 ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
16777 /*CompressionCost*/ true);
16778
16779 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
16780 // combine should be prevented.
16781 if (C1Cost < ShiftedC1Cost)
16782 return false;
16783 }
16784 }
16785 return true;
16786}
16787
16789 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
16790 TargetLoweringOpt &TLO) const {
16791 // Delay this optimization as late as possible.
16792 if (!TLO.LegalOps)
16793 return false;
16794
16795 EVT VT = Op.getValueType();
16796 if (VT.isVector())
16797 return false;
16798
16799 unsigned Opcode = Op.getOpcode();
16800 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
16801 return false;
16802
16803 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
16804 if (!C)
16805 return false;
16806
16807 const APInt &Mask = C->getAPIntValue();
16808
16809 // Clear all non-demanded bits initially.
16810 APInt ShrunkMask = Mask & DemandedBits;
16811
16812 // Try to make a smaller immediate by setting undemanded bits.
16813
16814 APInt ExpandedMask = Mask | ~DemandedBits;
16815
16816 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
16817 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
16818 };
16819 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
16820 if (NewMask == Mask)
16821 return true;
16822 SDLoc DL(Op);
16823 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
16824 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
16825 Op.getOperand(0), NewC);
16826 return TLO.CombineTo(Op, NewOp);
16827 };
16828
16829 // If the shrunk mask fits in sign extended 12 bits, let the target
16830 // independent code apply it.
16831 if (ShrunkMask.isSignedIntN(12))
16832 return false;
16833
16834 // And has a few special cases for zext.
16835 if (Opcode == ISD::AND) {
16836 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
16837 // otherwise use SLLI + SRLI.
16838 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
16839 if (IsLegalMask(NewMask))
16840 return UseMask(NewMask);
16841
16842 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
16843 if (VT == MVT::i64) {
16844 APInt NewMask = APInt(64, 0xffffffff);
16845 if (IsLegalMask(NewMask))
16846 return UseMask(NewMask);
16847 }
16848 }
16849
16850 // For the remaining optimizations, we need to be able to make a negative
16851 // number through a combination of mask and undemanded bits.
16852 if (!ExpandedMask.isNegative())
16853 return false;
16854
16855 // What is the fewest number of bits we need to represent the negative number.
16856 unsigned MinSignedBits = ExpandedMask.getSignificantBits();
16857
16858 // Try to make a 12 bit negative immediate. If that fails try to make a 32
16859 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
16860 // If we can't create a simm12, we shouldn't change opaque constants.
16861 APInt NewMask = ShrunkMask;
16862 if (MinSignedBits <= 12)
16863 NewMask.setBitsFrom(11);
16864 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
16865 NewMask.setBitsFrom(31);
16866 else
16867 return false;
16868
16869 // Check that our new mask is a subset of the demanded mask.
16870 assert(IsLegalMask(NewMask));
16871 return UseMask(NewMask);
16872}
16873
16874static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
16875 static const uint64_t GREVMasks[] = {
16876 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
16877 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
16878
16879 for (unsigned Stage = 0; Stage != 6; ++Stage) {
16880 unsigned Shift = 1 << Stage;
16881 if (ShAmt & Shift) {
16882 uint64_t Mask = GREVMasks[Stage];
16883 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
16884 if (IsGORC)
16885 Res |= x;
16886 x = Res;
16887 }
16888 }
16889
16890 return x;
16891}
16892
16894 KnownBits &Known,
16895 const APInt &DemandedElts,
16896 const SelectionDAG &DAG,
16897 unsigned Depth) const {
16898 unsigned BitWidth = Known.getBitWidth();
16899 unsigned Opc = Op.getOpcode();
16900 assert((Opc >= ISD::BUILTIN_OP_END ||
16901 Opc == ISD::INTRINSIC_WO_CHAIN ||
16902 Opc == ISD::INTRINSIC_W_CHAIN ||
16903 Opc == ISD::INTRINSIC_VOID) &&
16904 "Should use MaskedValueIsZero if you don't know whether Op"
16905 " is a target node!");
16906
16907 Known.resetAll();
16908 switch (Opc) {
16909 default: break;
16910 case RISCVISD::SELECT_CC: {
16911 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
16912 // If we don't know any bits, early out.
16913 if (Known.isUnknown())
16914 break;
16915 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
16916
16917 // Only known if known in both the LHS and RHS.
16918 Known = Known.intersectWith(Known2);
16919 break;
16920 }
16923 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
16924 // Result is either all zero or operand 0. We can propagate zeros, but not
16925 // ones.
16926 Known.One.clearAllBits();
16927 break;
16928 case RISCVISD::REMUW: {
16929 KnownBits Known2;
16930 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
16931 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
16932 // We only care about the lower 32 bits.
16933 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
16934 // Restore the original width by sign extending.
16935 Known = Known.sext(BitWidth);
16936 break;
16937 }
16938 case RISCVISD::DIVUW: {
16939 KnownBits Known2;
16940 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
16941 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
16942 // We only care about the lower 32 bits.
16943 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
16944 // Restore the original width by sign extending.
16945 Known = Known.sext(BitWidth);
16946 break;
16947 }
16948 case RISCVISD::SLLW: {
16949 KnownBits Known2;
16950 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
16951 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
16952 Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));
16953 // Restore the original width by sign extending.
16954 Known = Known.sext(BitWidth);
16955 break;
16956 }
16957 case RISCVISD::CTZW: {
16958 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
16959 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
16960 unsigned LowBits = llvm::bit_width(PossibleTZ);
16961 Known.Zero.setBitsFrom(LowBits);
16962 break;
16963 }
16964 case RISCVISD::CLZW: {
16965 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
16966 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
16967 unsigned LowBits = llvm::bit_width(PossibleLZ);
16968 Known.Zero.setBitsFrom(LowBits);
16969 break;
16970 }
16971 case RISCVISD::BREV8:
16972 case RISCVISD::ORC_B: {
16973 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
16974 // control value of 7 is equivalent to brev8 and orc.b.
16975 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
16976 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
16977 // To compute zeros, we need to invert the value and invert it back after.
16978 Known.Zero =
16979 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
16980 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
16981 break;
16982 }
16983 case RISCVISD::READ_VLENB: {
16984 // We can use the minimum and maximum VLEN values to bound VLENB. We
16985 // know VLEN must be a power of two.
16986 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
16987 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
16988 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
16989 Known.Zero.setLowBits(Log2_32(MinVLenB));
16990 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
16991 if (MaxVLenB == MinVLenB)
16992 Known.One.setBit(Log2_32(MinVLenB));
16993 break;
16994 }
16995 case RISCVISD::FCLASS: {
16996 // fclass will only set one of the low 10 bits.
16997 Known.Zero.setBitsFrom(10);
16998 break;
16999 }
17002 unsigned IntNo =
17003 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
17004 switch (IntNo) {
17005 default:
17006 // We can't do anything for most intrinsics.
17007 break;
17008 case Intrinsic::riscv_vsetvli:
17009 case Intrinsic::riscv_vsetvlimax: {
17010 bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
17011 unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1);
17012 RISCVII::VLMUL VLMUL =
17013 static_cast<RISCVII::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));
17014 unsigned SEW = RISCVVType::decodeVSEW(VSEW);
17015 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
17016 uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
17017 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
17018
17019 // Result of vsetvli must be not larger than AVL.
17020 if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))
17021 MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1));
17022
17023 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
17024 if (BitWidth > KnownZeroFirstBit)
17025 Known.Zero.setBitsFrom(KnownZeroFirstBit);
17026 break;
17027 }
17028 }
17029 break;
17030 }
17031 }
17032}
17033
17035 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
17036 unsigned Depth) const {
17037 switch (Op.getOpcode()) {
17038 default:
17039 break;
17040 case RISCVISD::SELECT_CC: {
17041 unsigned Tmp =
17042 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
17043 if (Tmp == 1) return 1; // Early out.
17044 unsigned Tmp2 =
17045 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
17046 return std::min(Tmp, Tmp2);
17047 }
17050 // Output is either all zero or operand 0. We can propagate sign bit count
17051 // from operand 0.
17052 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
17053 case RISCVISD::ABSW: {
17054 // We expand this at isel to negw+max. The result will have 33 sign bits
17055 // if the input has at least 33 sign bits.
17056 unsigned Tmp =
17057 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
17058 if (Tmp < 33) return 1;
17059 return 33;
17060 }
17061 case RISCVISD::SLLW:
17062 case RISCVISD::SRAW:
17063 case RISCVISD::SRLW:
17064 case RISCVISD::DIVW:
17065 case RISCVISD::DIVUW:
17066 case RISCVISD::REMUW:
17067 case RISCVISD::ROLW:
17068 case RISCVISD::RORW:
17073 // TODO: As the result is sign-extended, this is conservatively correct. A
17074 // more precise answer could be calculated for SRAW depending on known
17075 // bits in the shift amount.
17076 return 33;
17077 case RISCVISD::VMV_X_S: {
17078 // The number of sign bits of the scalar result is computed by obtaining the
17079 // element type of the input vector operand, subtracting its width from the
17080 // XLEN, and then adding one (sign bit within the element type). If the
17081 // element type is wider than XLen, the least-significant XLEN bits are
17082 // taken.
17083 unsigned XLen = Subtarget.getXLen();
17084 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
17085 if (EltBits <= XLen)
17086 return XLen - EltBits + 1;
17087 break;
17088 }
17090 unsigned IntNo = Op.getConstantOperandVal(1);
17091 switch (IntNo) {
17092 default:
17093 break;
17094 case Intrinsic::riscv_masked_atomicrmw_xchg_i64:
17095 case Intrinsic::riscv_masked_atomicrmw_add_i64:
17096 case Intrinsic::riscv_masked_atomicrmw_sub_i64:
17097 case Intrinsic::riscv_masked_atomicrmw_nand_i64:
17098 case Intrinsic::riscv_masked_atomicrmw_max_i64:
17099 case Intrinsic::riscv_masked_atomicrmw_min_i64:
17100 case Intrinsic::riscv_masked_atomicrmw_umax_i64:
17101 case Intrinsic::riscv_masked_atomicrmw_umin_i64:
17102 case Intrinsic::riscv_masked_cmpxchg_i64:
17103 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
17104 // narrow atomic operation. These are implemented using atomic
17105 // operations at the minimum supported atomicrmw/cmpxchg width whose
17106 // result is then sign extended to XLEN. With +A, the minimum width is
17107 // 32 for both 64 and 32.
17108 assert(Subtarget.getXLen() == 64);
17110 assert(Subtarget.hasStdExtA());
17111 return 33;
17112 }
17113 break;
17114 }
17115 }
17116
17117 return 1;
17118}
17119
17121 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
17122 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
17123
17124 // TODO: Add more target nodes.
17125 switch (Op.getOpcode()) {
17127 // Integer select_cc cannot create poison.
17128 // TODO: What are the FP poison semantics?
17129 // TODO: This instruction blocks poison from the unselected operand, can
17130 // we do anything with that?
17131 return !Op.getValueType().isInteger();
17132 }
17134 Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
17135}
17136
17137const Constant *
17139 assert(Ld && "Unexpected null LoadSDNode");
17140 if (!ISD::isNormalLoad(Ld))
17141 return nullptr;
17142
17143 SDValue Ptr = Ld->getBasePtr();
17144
17145 // Only constant pools with no offset are supported.
17146 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
17147 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
17148 if (!CNode || CNode->isMachineConstantPoolEntry() ||
17149 CNode->getOffset() != 0)
17150 return nullptr;
17151
17152 return CNode;
17153 };
17154
17155 // Simple case, LLA.
17156 if (Ptr.getOpcode() == RISCVISD::LLA) {
17157 auto *CNode = GetSupportedConstantPool(Ptr);
17158 if (!CNode || CNode->getTargetFlags() != 0)
17159 return nullptr;
17160
17161 return CNode->getConstVal();
17162 }
17163
17164 // Look for a HI and ADD_LO pair.
17165 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
17166 Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
17167 return nullptr;
17168
17169 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
17170 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
17171
17172 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
17173 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
17174 return nullptr;
17175
17176 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
17177 return nullptr;
17178
17179 return CNodeLo->getConstVal();
17180}
17181
17183 MachineBasicBlock *BB) {
17184 assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction");
17185
17186 // To read a 64-bit counter CSR on a 32-bit target, we read the two halves.
17187 // Should the count have wrapped while it was being read, we need to try
17188 // again.
17189 // For example:
17190 // ```
17191 // read:
17192 // csrrs x3, counterh # load high word of counter
17193 // csrrs x2, counter # load low word of counter
17194 // csrrs x4, counterh # load high word of counter
17195 // bne x3, x4, read # check if high word reads match, otherwise try again
17196 // ```
17197
17198 MachineFunction &MF = *BB->getParent();
17199 const BasicBlock *LLVMBB = BB->getBasicBlock();
17201
17202 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVMBB);
17203 MF.insert(It, LoopMBB);
17204
17205 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVMBB);
17206 MF.insert(It, DoneMBB);
17207
17208 // Transfer the remainder of BB and its successor edges to DoneMBB.
17209 DoneMBB->splice(DoneMBB->begin(), BB,
17210 std::next(MachineBasicBlock::iterator(MI)), BB->end());
17212
17213 BB->addSuccessor(LoopMBB);
17214
17216 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
17217 Register LoReg = MI.getOperand(0).getReg();
17218 Register HiReg = MI.getOperand(1).getReg();
17219 int64_t LoCounter = MI.getOperand(2).getImm();
17220 int64_t HiCounter = MI.getOperand(3).getImm();
17221 DebugLoc DL = MI.getDebugLoc();
17222
17224 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
17225 .addImm(HiCounter)
17226 .addReg(RISCV::X0);
17227 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
17228 .addImm(LoCounter)
17229 .addReg(RISCV::X0);
17230 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
17231 .addImm(HiCounter)
17232 .addReg(RISCV::X0);
17233
17234 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
17235 .addReg(HiReg)
17236 .addReg(ReadAgainReg)
17237 .addMBB(LoopMBB);
17238
17239 LoopMBB->addSuccessor(LoopMBB);
17240 LoopMBB->addSuccessor(DoneMBB);
17241
17242 MI.eraseFromParent();
17243
17244 return DoneMBB;
17245}
17246
17249 const RISCVSubtarget &Subtarget) {
17250 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
17251
17252 MachineFunction &MF = *BB->getParent();
17253 DebugLoc DL = MI.getDebugLoc();
17256 Register LoReg = MI.getOperand(0).getReg();
17257 Register HiReg = MI.getOperand(1).getReg();
17258 Register SrcReg = MI.getOperand(2).getReg();
17259
17260 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
17261 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
17262
17263 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
17264 RI, Register());
17266 MachineMemOperand *MMOLo =
17270 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
17271 .addFrameIndex(FI)
17272 .addImm(0)
17273 .addMemOperand(MMOLo);
17274 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
17275 .addFrameIndex(FI)
17276 .addImm(4)
17277 .addMemOperand(MMOHi);
17278 MI.eraseFromParent(); // The pseudo instruction is gone now.
17279 return BB;
17280}
17281
17284 const RISCVSubtarget &Subtarget) {
17285 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
17286 "Unexpected instruction");
17287
17288 MachineFunction &MF = *BB->getParent();
17289 DebugLoc DL = MI.getDebugLoc();
17292 Register DstReg = MI.getOperand(0).getReg();
17293 Register LoReg = MI.getOperand(1).getReg();
17294 Register HiReg = MI.getOperand(2).getReg();
17295
17296 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
17297 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
17298
17300 MachineMemOperand *MMOLo =
17304 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
17305 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
17306 .addFrameIndex(FI)
17307 .addImm(0)
17308 .addMemOperand(MMOLo);
17309 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
17310 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
17311 .addFrameIndex(FI)
17312 .addImm(4)
17313 .addMemOperand(MMOHi);
17314 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
17315 MI.eraseFromParent(); // The pseudo instruction is gone now.
17316 return BB;
17317}
17318
17320 switch (MI.getOpcode()) {
17321 default:
17322 return false;
17323 case RISCV::Select_GPR_Using_CC_GPR:
17324 case RISCV::Select_FPR16_Using_CC_GPR:
17325 case RISCV::Select_FPR16INX_Using_CC_GPR:
17326 case RISCV::Select_FPR32_Using_CC_GPR:
17327 case RISCV::Select_FPR32INX_Using_CC_GPR:
17328 case RISCV::Select_FPR64_Using_CC_GPR:
17329 case RISCV::Select_FPR64INX_Using_CC_GPR:
17330 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
17331 return true;
17332 }
17333}
17334
17336 unsigned RelOpcode, unsigned EqOpcode,
17337 const RISCVSubtarget &Subtarget) {
17338 DebugLoc DL = MI.getDebugLoc();
17339 Register DstReg = MI.getOperand(0).getReg();
17340 Register Src1Reg = MI.getOperand(1).getReg();
17341 Register Src2Reg = MI.getOperand(2).getReg();
17343 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
17345
17346 // Save the current FFLAGS.
17347 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
17348
17349 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
17350 .addReg(Src1Reg)
17351 .addReg(Src2Reg);
17354
17355 // Restore the FFLAGS.
17356 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
17357 .addReg(SavedFFlags, RegState::Kill);
17358
17359 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
17360 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
17361 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
17362 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
17365
17366 // Erase the pseudoinstruction.
17367 MI.eraseFromParent();
17368 return BB;
17369}
17370
17371static MachineBasicBlock *
17373 MachineBasicBlock *ThisMBB,
17374 const RISCVSubtarget &Subtarget) {
17375 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
17376 // Without this, custom-inserter would have generated:
17377 //
17378 // A
17379 // | \
17380 // | B
17381 // | /
17382 // C
17383 // | \
17384 // | D
17385 // | /
17386 // E
17387 //
17388 // A: X = ...; Y = ...
17389 // B: empty
17390 // C: Z = PHI [X, A], [Y, B]
17391 // D: empty
17392 // E: PHI [X, C], [Z, D]
17393 //
17394 // If we lower both Select_FPRX_ in a single step, we can instead generate:
17395 //
17396 // A
17397 // | \
17398 // | C
17399 // | /|
17400 // |/ |
17401 // | |
17402 // | D
17403 // | /
17404 // E
17405 //
17406 // A: X = ...; Y = ...
17407 // D: empty
17408 // E: PHI [X, A], [X, C], [Y, D]
17409
17410 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
17411 const DebugLoc &DL = First.getDebugLoc();
17412 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
17413 MachineFunction *F = ThisMBB->getParent();
17414 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
17415 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
17416 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
17417 MachineFunction::iterator It = ++ThisMBB->getIterator();
17418 F->insert(It, FirstMBB);
17419 F->insert(It, SecondMBB);
17420 F->insert(It, SinkMBB);
17421
17422 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
17423 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
17425 ThisMBB->end());
17426 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
17427
17428 // Fallthrough block for ThisMBB.
17429 ThisMBB->addSuccessor(FirstMBB);
17430 // Fallthrough block for FirstMBB.
17431 FirstMBB->addSuccessor(SecondMBB);
17432 ThisMBB->addSuccessor(SinkMBB);
17433 FirstMBB->addSuccessor(SinkMBB);
17434 // This is fallthrough.
17435 SecondMBB->addSuccessor(SinkMBB);
17436
17437 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
17438 Register FLHS = First.getOperand(1).getReg();
17439 Register FRHS = First.getOperand(2).getReg();
17440 // Insert appropriate branch.
17441 BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC))
17442 .addReg(FLHS)
17443 .addReg(FRHS)
17444 .addMBB(SinkMBB);
17445
17446 Register SLHS = Second.getOperand(1).getReg();
17447 Register SRHS = Second.getOperand(2).getReg();
17448 Register Op1Reg4 = First.getOperand(4).getReg();
17449 Register Op1Reg5 = First.getOperand(5).getReg();
17450
17451 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
17452 // Insert appropriate branch.
17453 BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC))
17454 .addReg(SLHS)
17455 .addReg(SRHS)
17456 .addMBB(SinkMBB);
17457
17458 Register DestReg = Second.getOperand(0).getReg();
17459 Register Op2Reg4 = Second.getOperand(4).getReg();
17460 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
17461 .addReg(Op2Reg4)
17462 .addMBB(ThisMBB)
17463 .addReg(Op1Reg4)
17464 .addMBB(FirstMBB)
17465 .addReg(Op1Reg5)
17466 .addMBB(SecondMBB);
17467
17468 // Now remove the Select_FPRX_s.
17469 First.eraseFromParent();
17470 Second.eraseFromParent();
17471 return SinkMBB;
17472}
17473
17476 const RISCVSubtarget &Subtarget) {
17477 // To "insert" Select_* instructions, we actually have to insert the triangle
17478 // control-flow pattern. The incoming instructions know the destination vreg
17479 // to set, the condition code register to branch on, the true/false values to
17480 // select between, and the condcode to use to select the appropriate branch.
17481 //
17482 // We produce the following control flow:
17483 // HeadMBB
17484 // | \
17485 // | IfFalseMBB
17486 // | /
17487 // TailMBB
17488 //
17489 // When we find a sequence of selects we attempt to optimize their emission
17490 // by sharing the control flow. Currently we only handle cases where we have
17491 // multiple selects with the exact same condition (same LHS, RHS and CC).
17492 // The selects may be interleaved with other instructions if the other
17493 // instructions meet some requirements we deem safe:
17494 // - They are not pseudo instructions.
17495 // - They are debug instructions. Otherwise,
17496 // - They do not have side-effects, do not access memory and their inputs do
17497 // not depend on the results of the select pseudo-instructions.
17498 // The TrueV/FalseV operands of the selects cannot depend on the result of
17499 // previous selects in the sequence.
17500 // These conditions could be further relaxed. See the X86 target for a
17501 // related approach and more information.
17502 //
17503 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
17504 // is checked here and handled by a separate function -
17505 // EmitLoweredCascadedSelect.
17506 Register LHS = MI.getOperand(1).getReg();
17507 Register RHS = MI.getOperand(2).getReg();
17508 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
17509
17510 SmallVector<MachineInstr *, 4> SelectDebugValues;
17511 SmallSet<Register, 4> SelectDests;
17512 SelectDests.insert(MI.getOperand(0).getReg());
17513
17514 MachineInstr *LastSelectPseudo = &MI;
17515 auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
17516 if (MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR && Next != BB->end() &&
17517 Next->getOpcode() == MI.getOpcode() &&
17518 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
17519 Next->getOperand(5).isKill()) {
17520 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
17521 }
17522
17523 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
17524 SequenceMBBI != E; ++SequenceMBBI) {
17525 if (SequenceMBBI->isDebugInstr())
17526 continue;
17527 if (isSelectPseudo(*SequenceMBBI)) {
17528 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
17529 SequenceMBBI->getOperand(2).getReg() != RHS ||
17530 SequenceMBBI->getOperand(3).getImm() != CC ||
17531 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
17532 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
17533 break;
17534 LastSelectPseudo = &*SequenceMBBI;
17535 SequenceMBBI->collectDebugValues(SelectDebugValues);
17536 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
17537 continue;
17538 }
17539 if (SequenceMBBI->hasUnmodeledSideEffects() ||
17540 SequenceMBBI->mayLoadOrStore() ||
17541 SequenceMBBI->usesCustomInsertionHook())
17542 break;
17543 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
17544 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
17545 }))
17546 break;
17547 }
17548
17549 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
17550 const BasicBlock *LLVM_BB = BB->getBasicBlock();
17551 DebugLoc DL = MI.getDebugLoc();
17553
17554 MachineBasicBlock *HeadMBB = BB;
17555 MachineFunction *F = BB->getParent();
17556 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
17557 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
17558
17559 F->insert(I, IfFalseMBB);
17560 F->insert(I, TailMBB);
17561
17562 // Transfer debug instructions associated with the selects to TailMBB.
17563 for (MachineInstr *DebugInstr : SelectDebugValues) {
17564 TailMBB->push_back(DebugInstr->removeFromParent());
17565 }
17566
17567 // Move all instructions after the sequence to TailMBB.
17568 TailMBB->splice(TailMBB->end(), HeadMBB,
17569 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
17570 // Update machine-CFG edges by transferring all successors of the current
17571 // block to the new block which will contain the Phi nodes for the selects.
17572 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
17573 // Set the successors for HeadMBB.
17574 HeadMBB->addSuccessor(IfFalseMBB);
17575 HeadMBB->addSuccessor(TailMBB);
17576
17577 // Insert appropriate branch.
17578 BuildMI(HeadMBB, DL, TII.getBrCond(CC))
17579 .addReg(LHS)
17580 .addReg(RHS)
17581 .addMBB(TailMBB);
17582
17583 // IfFalseMBB just falls through to TailMBB.
17584 IfFalseMBB->addSuccessor(TailMBB);
17585
17586 // Create PHIs for all of the select pseudo-instructions.
17587 auto SelectMBBI = MI.getIterator();
17588 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
17589 auto InsertionPoint = TailMBB->begin();
17590 while (SelectMBBI != SelectEnd) {
17591 auto Next = std::next(SelectMBBI);
17592 if (isSelectPseudo(*SelectMBBI)) {
17593 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
17594 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
17595 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
17596 .addReg(SelectMBBI->getOperand(4).getReg())
17597 .addMBB(HeadMBB)
17598 .addReg(SelectMBBI->getOperand(5).getReg())
17599 .addMBB(IfFalseMBB);
17600 SelectMBBI->eraseFromParent();
17601 }
17602 SelectMBBI = Next;
17603 }
17604
17605 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
17606 return TailMBB;
17607}
17608
17611 unsigned CVTXOpc,
17612 unsigned CVTFOpc) {
17613 DebugLoc DL = MI.getDebugLoc();
17614
17616
17618 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
17619
17620 // Save the old value of FFLAGS.
17621 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
17622
17623 assert(MI.getNumOperands() == 7);
17624
17625 // Emit a VFCVT_X_F
17626 const TargetRegisterInfo *TRI =
17628 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
17629 Register Tmp = MRI.createVirtualRegister(RC);
17630 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
17631 .add(MI.getOperand(1))
17632 .add(MI.getOperand(2))
17633 .add(MI.getOperand(3))
17634 .add(MachineOperand::CreateImm(7)) // frm = DYN
17635 .add(MI.getOperand(4))
17636 .add(MI.getOperand(5))
17637 .add(MI.getOperand(6))
17638 .add(MachineOperand::CreateReg(RISCV::FRM,
17639 /*IsDef*/ false,
17640 /*IsImp*/ true));
17641
17642 // Emit a VFCVT_F_X
17643 BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
17644 .add(MI.getOperand(0))
17645 .add(MI.getOperand(1))
17646 .addReg(Tmp)
17647 .add(MI.getOperand(3))
17648 .add(MachineOperand::CreateImm(7)) // frm = DYN
17649 .add(MI.getOperand(4))
17650 .add(MI.getOperand(5))
17651 .add(MI.getOperand(6))
17652 .add(MachineOperand::CreateReg(RISCV::FRM,
17653 /*IsDef*/ false,
17654 /*IsImp*/ true));
17655
17656 // Restore FFLAGS.
17657 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
17658 .addReg(SavedFFLAGS, RegState::Kill);
17659
17660 // Erase the pseudoinstruction.
17661 MI.eraseFromParent();
17662 return BB;
17663}
17664
17666 const RISCVSubtarget &Subtarget) {
17667 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
17668 const TargetRegisterClass *RC;
17669 switch (MI.getOpcode()) {
17670 default:
17671 llvm_unreachable("Unexpected opcode");
17672 case RISCV::PseudoFROUND_H:
17673 CmpOpc = RISCV::FLT_H;
17674 F2IOpc = RISCV::FCVT_W_H;
17675 I2FOpc = RISCV::FCVT_H_W;
17676 FSGNJOpc = RISCV::FSGNJ_H;
17677 FSGNJXOpc = RISCV::FSGNJX_H;
17678 RC = &RISCV::FPR16RegClass;
17679 break;
17680 case RISCV::PseudoFROUND_H_INX:
17681 CmpOpc = RISCV::FLT_H_INX;
17682 F2IOpc = RISCV::FCVT_W_H_INX;
17683 I2FOpc = RISCV::FCVT_H_W_INX;
17684 FSGNJOpc = RISCV::FSGNJ_H_INX;
17685 FSGNJXOpc = RISCV::FSGNJX_H_INX;
17686 RC = &RISCV::GPRF16RegClass;
17687 break;
17688 case RISCV::PseudoFROUND_S:
17689 CmpOpc = RISCV::FLT_S;
17690 F2IOpc = RISCV::FCVT_W_S;
17691 I2FOpc = RISCV::FCVT_S_W;
17692 FSGNJOpc = RISCV::FSGNJ_S;
17693 FSGNJXOpc = RISCV::FSGNJX_S;
17694 RC = &RISCV::FPR32RegClass;
17695 break;
17696 case RISCV::PseudoFROUND_S_INX:
17697 CmpOpc = RISCV::FLT_S_INX;
17698 F2IOpc = RISCV::FCVT_W_S_INX;
17699 I2FOpc = RISCV::FCVT_S_W_INX;
17700 FSGNJOpc = RISCV::FSGNJ_S_INX;
17701 FSGNJXOpc = RISCV::FSGNJX_S_INX;
17702 RC = &RISCV::GPRF32RegClass;
17703 break;
17704 case RISCV::PseudoFROUND_D:
17705 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
17706 CmpOpc = RISCV::FLT_D;
17707 F2IOpc = RISCV::FCVT_L_D;
17708 I2FOpc = RISCV::FCVT_D_L;
17709 FSGNJOpc = RISCV::FSGNJ_D;
17710 FSGNJXOpc = RISCV::FSGNJX_D;
17711 RC = &RISCV::FPR64RegClass;
17712 break;
17713 case RISCV::PseudoFROUND_D_INX:
17714 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
17715 CmpOpc = RISCV::FLT_D_INX;
17716 F2IOpc = RISCV::FCVT_L_D_INX;
17717 I2FOpc = RISCV::FCVT_D_L_INX;
17718 FSGNJOpc = RISCV::FSGNJ_D_INX;
17719 FSGNJXOpc = RISCV::FSGNJX_D_INX;
17720 RC = &RISCV::GPRRegClass;
17721 break;
17722 }
17723
17724 const BasicBlock *BB = MBB->getBasicBlock();
17725 DebugLoc DL = MI.getDebugLoc();
17727
17729 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
17730 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
17731
17732 F->insert(I, CvtMBB);
17733 F->insert(I, DoneMBB);
17734 // Move all instructions after the sequence to DoneMBB.
17735 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
17736 MBB->end());
17737 // Update machine-CFG edges by transferring all successors of the current
17738 // block to the new block which will contain the Phi nodes for the selects.
17740 // Set the successors for MBB.
17741 MBB->addSuccessor(CvtMBB);
17742 MBB->addSuccessor(DoneMBB);
17743
17744 Register DstReg = MI.getOperand(0).getReg();
17745 Register SrcReg = MI.getOperand(1).getReg();
17746 Register MaxReg = MI.getOperand(2).getReg();
17747 int64_t FRM = MI.getOperand(3).getImm();
17748
17749 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
17751
17752 Register FabsReg = MRI.createVirtualRegister(RC);
17753 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
17754
17755 // Compare the FP value to the max value.
17756 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
17757 auto MIB =
17758 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
17761
17762 // Insert branch.
17763 BuildMI(MBB, DL, TII.get(RISCV::BEQ))
17764 .addReg(CmpReg)
17765 .addReg(RISCV::X0)
17766 .addMBB(DoneMBB);
17767
17768 CvtMBB->addSuccessor(DoneMBB);
17769
17770 // Convert to integer.
17771 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
17772 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
17775
17776 // Convert back to FP.
17777 Register I2FReg = MRI.createVirtualRegister(RC);
17778 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
17781
17782 // Restore the sign bit.
17783 Register CvtReg = MRI.createVirtualRegister(RC);
17784 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
17785
17786 // Merge the results.
17787 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
17788 .addReg(SrcReg)
17789 .addMBB(MBB)
17790 .addReg(CvtReg)
17791 .addMBB(CvtMBB);
17792
17793 MI.eraseFromParent();
17794 return DoneMBB;
17795}
17796
17799 MachineBasicBlock *BB) const {
17800 switch (MI.getOpcode()) {
17801 default:
17802 llvm_unreachable("Unexpected instr type to insert");
17803 case RISCV::ReadCounterWide:
17804 assert(!Subtarget.is64Bit() &&
17805 "ReadCounterWide is only to be used on riscv32");
17806 return emitReadCounterWidePseudo(MI, BB);
17807 case RISCV::Select_GPR_Using_CC_GPR:
17808 case RISCV::Select_FPR16_Using_CC_GPR:
17809 case RISCV::Select_FPR16INX_Using_CC_GPR:
17810 case RISCV::Select_FPR32_Using_CC_GPR:
17811 case RISCV::Select_FPR32INX_Using_CC_GPR:
17812 case RISCV::Select_FPR64_Using_CC_GPR:
17813 case RISCV::Select_FPR64INX_Using_CC_GPR:
17814 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
17815 return emitSelectPseudo(MI, BB, Subtarget);
17816 case RISCV::BuildPairF64Pseudo:
17817 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
17818 case RISCV::SplitF64Pseudo:
17819 return emitSplitF64Pseudo(MI, BB, Subtarget);
17820 case RISCV::PseudoQuietFLE_H:
17821 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
17822 case RISCV::PseudoQuietFLE_H_INX:
17823 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
17824 case RISCV::PseudoQuietFLT_H:
17825 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
17826 case RISCV::PseudoQuietFLT_H_INX:
17827 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
17828 case RISCV::PseudoQuietFLE_S:
17829 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
17830 case RISCV::PseudoQuietFLE_S_INX:
17831 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
17832 case RISCV::PseudoQuietFLT_S:
17833 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
17834 case RISCV::PseudoQuietFLT_S_INX:
17835 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
17836 case RISCV::PseudoQuietFLE_D:
17837 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
17838 case RISCV::PseudoQuietFLE_D_INX:
17839 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
17840 case RISCV::PseudoQuietFLE_D_IN32X:
17841 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
17842 Subtarget);
17843 case RISCV::PseudoQuietFLT_D:
17844 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
17845 case RISCV::PseudoQuietFLT_D_INX:
17846 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
17847 case RISCV::PseudoQuietFLT_D_IN32X:
17848 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
17849 Subtarget);
17850
17851 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
17852 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK,
17853 RISCV::PseudoVFCVT_F_X_V_M1_MASK);
17854 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
17855 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK,
17856 RISCV::PseudoVFCVT_F_X_V_M2_MASK);
17857 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
17858 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK,
17859 RISCV::PseudoVFCVT_F_X_V_M4_MASK);
17860 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
17861 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK,
17862 RISCV::PseudoVFCVT_F_X_V_M8_MASK);
17863 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
17864 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK,
17865 RISCV::PseudoVFCVT_F_X_V_MF2_MASK);
17866 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
17867 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK,
17868 RISCV::PseudoVFCVT_F_X_V_MF4_MASK);
17869 case RISCV::PseudoFROUND_H:
17870 case RISCV::PseudoFROUND_H_INX:
17871 case RISCV::PseudoFROUND_S:
17872 case RISCV::PseudoFROUND_S_INX:
17873 case RISCV::PseudoFROUND_D:
17874 case RISCV::PseudoFROUND_D_INX:
17875 case RISCV::PseudoFROUND_D_IN32X:
17876 return emitFROUND(MI, BB, Subtarget);
17877 case TargetOpcode::STATEPOINT:
17878 case TargetOpcode::STACKMAP:
17879 case TargetOpcode::PATCHPOINT:
17880 if (!Subtarget.is64Bit())
17881 report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only "
17882 "supported on 64-bit targets");
17883 return emitPatchPoint(MI, BB);
17884 }
17885}
17886
17888 SDNode *Node) const {
17889 // Add FRM dependency to any instructions with dynamic rounding mode.
17890 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
17891 if (Idx < 0) {
17892 // Vector pseudos have FRM index indicated by TSFlags.
17893 Idx = RISCVII::getFRMOpNum(MI.getDesc());
17894 if (Idx < 0)
17895 return;
17896 }
17897 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
17898 return;
17899 // If the instruction already reads FRM, don't add another read.
17900 if (MI.readsRegister(RISCV::FRM))
17901 return;
17902 MI.addOperand(
17903 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
17904}
17905
17906// Calling Convention Implementation.
17907// The expectations for frontend ABI lowering vary from target to target.
17908// Ideally, an LLVM frontend would be able to avoid worrying about many ABI
17909// details, but this is a longer term goal. For now, we simply try to keep the
17910// role of the frontend as simple and well-defined as possible. The rules can
17911// be summarised as:
17912// * Never split up large scalar arguments. We handle them here.
17913// * If a hardfloat calling convention is being used, and the struct may be
17914// passed in a pair of registers (fp+fp, int+fp), and both registers are
17915// available, then pass as two separate arguments. If either the GPRs or FPRs
17916// are exhausted, then pass according to the rule below.
17917// * If a struct could never be passed in registers or directly in a stack
17918// slot (as it is larger than 2*XLEN and the floating point rules don't
17919// apply), then pass it using a pointer with the byval attribute.
17920// * If a struct is less than 2*XLEN, then coerce to either a two-element
17921// word-sized array or a 2*XLEN scalar (depending on alignment).
17922// * The frontend can determine whether a struct is returned by reference or
17923// not based on its size and fields. If it will be returned by reference, the
17924// frontend must modify the prototype so a pointer with the sret annotation is
17925// passed as the first argument. This is not necessary for large scalar
17926// returns.
17927// * Struct return values and varargs should be coerced to structs containing
17928// register-size fields in the same situations they would be for fixed
17929// arguments.
17930
17931static const MCPhysReg ArgFPR16s[] = {
17932 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
17933 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
17934};
17935static const MCPhysReg ArgFPR32s[] = {
17936 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
17937 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
17938};
17939static const MCPhysReg ArgFPR64s[] = {
17940 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
17941 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
17942};
17943// This is an interim calling convention and it may be changed in the future.
17944static const MCPhysReg ArgVRs[] = {
17945 RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,
17946 RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,
17947 RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};
17948static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2,
17949 RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,
17950 RISCV::V20M2, RISCV::V22M2};
17951static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
17952 RISCV::V20M4};
17953static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
17954
17956 // The GPRs used for passing arguments in the ILP32* and LP64* ABIs, except
17957 // the ILP32E ABI.
17958 static const MCPhysReg ArgIGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
17959 RISCV::X13, RISCV::X14, RISCV::X15,
17960 RISCV::X16, RISCV::X17};
17961 // The GPRs used for passing arguments in the ILP32E/ILP64E ABI.
17962 static const MCPhysReg ArgEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
17963 RISCV::X13, RISCV::X14, RISCV::X15};
17964
17965 if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
17966 return ArrayRef(ArgEGPRs);
17967
17968 return ArrayRef(ArgIGPRs);
17969}
17970
17972 // The GPRs used for passing arguments in the FastCC, X5 and X6 might be used
17973 // for save-restore libcall, so we don't use them.
17974 static const MCPhysReg FastCCIGPRs[] = {
17975 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
17976 RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28,
17977 RISCV::X29, RISCV::X30, RISCV::X31};
17978
17979 // The GPRs used for passing arguments in the FastCC when using ILP32E/ILP64E.
17980 static const MCPhysReg FastCCEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
17981 RISCV::X13, RISCV::X14, RISCV::X15,
17982 RISCV::X7};
17983
17984 if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
17985 return ArrayRef(FastCCEGPRs);
17986
17987 return ArrayRef(FastCCIGPRs);
17988}
17989
17990// Pass a 2*XLEN argument that has been split into two XLEN values through
17991// registers or the stack as necessary.
17992static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
17993 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
17994 MVT ValVT2, MVT LocVT2,
17995 ISD::ArgFlagsTy ArgFlags2, bool EABI) {
17996 unsigned XLenInBytes = XLen / 8;
17997 const RISCVSubtarget &STI =
18000
18001 if (Register Reg = State.AllocateReg(ArgGPRs)) {
18002 // At least one half can be passed via register.
18003 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
18004 VA1.getLocVT(), CCValAssign::Full));
18005 } else {
18006 // Both halves must be passed on the stack, with proper alignment.
18007 // TODO: To be compatible with GCC's behaviors, we force them to have 4-byte
18008 // alignment. This behavior may be changed when RV32E/ILP32E is ratified.
18009 Align StackAlign(XLenInBytes);
18010 if (!EABI || XLen != 32)
18011 StackAlign = std::max(StackAlign, ArgFlags1.getNonZeroOrigAlign());
18012 State.addLoc(
18014 State.AllocateStack(XLenInBytes, StackAlign),
18015 VA1.getLocVT(), CCValAssign::Full));
18017 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
18018 LocVT2, CCValAssign::Full));
18019 return false;
18020 }
18021
18022 if (Register Reg = State.AllocateReg(ArgGPRs)) {
18023 // The second half can also be passed via register.
18024 State.addLoc(
18025 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
18026 } else {
18027 // The second half is passed via the stack, without additional alignment.
18029 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
18030 LocVT2, CCValAssign::Full));
18031 }
18032
18033 return false;
18034}
18035
18036static unsigned allocateRVVReg(MVT ValVT, unsigned ValNo,
18037 std::optional<unsigned> FirstMaskArgument,
18038 CCState &State, const RISCVTargetLowering &TLI) {
18039 const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT);
18040 if (RC == &RISCV::VRRegClass) {
18041 // Assign the first mask argument to V0.
18042 // This is an interim calling convention and it may be changed in the
18043 // future.
18044 if (FirstMaskArgument && ValNo == *FirstMaskArgument)
18045 return State.AllocateReg(RISCV::V0);
18046 return State.AllocateReg(ArgVRs);
18047 }
18048 if (RC == &RISCV::VRM2RegClass)
18049 return State.AllocateReg(ArgVRM2s);
18050 if (RC == &RISCV::VRM4RegClass)
18051 return State.AllocateReg(ArgVRM4s);
18052 if (RC == &RISCV::VRM8RegClass)
18053 return State.AllocateReg(ArgVRM8s);
18054 llvm_unreachable("Unhandled register class for ValueType");
18055}
18056
18057// Implements the RISC-V calling convention. Returns true upon failure.
18058bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
18059 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
18060 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
18061 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
18062 std::optional<unsigned> FirstMaskArgument) {
18063 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
18064 assert(XLen == 32 || XLen == 64);
18065 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
18066
18067 // Static chain parameter must not be passed in normal argument registers,
18068 // so we assign t2 for it as done in GCC's __builtin_call_with_static_chain
18069 if (ArgFlags.isNest()) {
18070 if (unsigned Reg = State.AllocateReg(RISCV::X7)) {
18071 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18072 return false;
18073 }
18074 }
18075
18076 // Any return value split in to more than two values can't be returned
18077 // directly. Vectors are returned via the available vector registers.
18078 if (!LocVT.isVector() && IsRet && ValNo > 1)
18079 return true;
18080
18081 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
18082 // variadic argument, or if no F16/F32 argument registers are available.
18083 bool UseGPRForF16_F32 = true;
18084 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
18085 // variadic argument, or if no F64 argument registers are available.
18086 bool UseGPRForF64 = true;
18087
18088 switch (ABI) {
18089 default:
18090 llvm_unreachable("Unexpected ABI");
18093 case RISCVABI::ABI_LP64:
18095 break;
18098 UseGPRForF16_F32 = !IsFixed;
18099 break;
18102 UseGPRForF16_F32 = !IsFixed;
18103 UseGPRForF64 = !IsFixed;
18104 break;
18105 }
18106
18107 // FPR16, FPR32, and FPR64 alias each other.
18108 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s)) {
18109 UseGPRForF16_F32 = true;
18110 UseGPRForF64 = true;
18111 }
18112
18113 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
18114 // similar local variables rather than directly checking against the target
18115 // ABI.
18116
18117 if (UseGPRForF16_F32 &&
18118 (ValVT == MVT::f16 || ValVT == MVT::bf16 || ValVT == MVT::f32)) {
18119 LocVT = XLenVT;
18120 LocInfo = CCValAssign::BCvt;
18121 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
18122 LocVT = MVT::i64;
18123 LocInfo = CCValAssign::BCvt;
18124 }
18125
18127
18128 // If this is a variadic argument, the RISC-V calling convention requires
18129 // that it is assigned an 'even' or 'aligned' register if it has 8-byte
18130 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
18131 // be used regardless of whether the original argument was split during
18132 // legalisation or not. The argument will not be passed by registers if the
18133 // original type is larger than 2*XLEN, so the register alignment rule does
18134 // not apply.
18135 // TODO: To be compatible with GCC's behaviors, we don't align registers
18136 // currently if we are using ILP32E calling convention. This behavior may be
18137 // changed when RV32E/ILP32E is ratified.
18138 unsigned TwoXLenInBytes = (2 * XLen) / 8;
18139 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
18140 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes &&
18141 ABI != RISCVABI::ABI_ILP32E) {
18142 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
18143 // Skip 'odd' register if necessary.
18144 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
18145 State.AllocateReg(ArgGPRs);
18146 }
18147
18148 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
18149 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
18150 State.getPendingArgFlags();
18151
18152 assert(PendingLocs.size() == PendingArgFlags.size() &&
18153 "PendingLocs and PendingArgFlags out of sync");
18154
18155 // Handle passing f64 on RV32D with a soft float ABI or when floating point
18156 // registers are exhausted.
18157 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
18158 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
18159 // Depending on available argument GPRS, f64 may be passed in a pair of
18160 // GPRs, split between a GPR and the stack, or passed completely on the
18161 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
18162 // cases.
18163 Register Reg = State.AllocateReg(ArgGPRs);
18164 if (!Reg) {
18165 unsigned StackOffset = State.AllocateStack(8, Align(8));
18166 State.addLoc(
18167 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18168 return false;
18169 }
18170 LocVT = MVT::i32;
18171 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18172 Register HiReg = State.AllocateReg(ArgGPRs);
18173 if (HiReg) {
18174 State.addLoc(
18175 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
18176 } else {
18177 unsigned StackOffset = State.AllocateStack(4, Align(4));
18178 State.addLoc(
18179 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18180 }
18181 return false;
18182 }
18183
18184 // Fixed-length vectors are located in the corresponding scalable-vector
18185 // container types.
18186 if (ValVT.isFixedLengthVector())
18187 LocVT = TLI.getContainerForFixedLengthVector(LocVT);
18188
18189 // Split arguments might be passed indirectly, so keep track of the pending
18190 // values. Split vectors are passed via a mix of registers and indirectly, so
18191 // treat them as we would any other argument.
18192 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
18193 LocVT = XLenVT;
18194 LocInfo = CCValAssign::Indirect;
18195 PendingLocs.push_back(
18196 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
18197 PendingArgFlags.push_back(ArgFlags);
18198 if (!ArgFlags.isSplitEnd()) {
18199 return false;
18200 }
18201 }
18202
18203 // If the split argument only had two elements, it should be passed directly
18204 // in registers or on the stack.
18205 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
18206 PendingLocs.size() <= 2) {
18207 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
18208 // Apply the normal calling convention rules to the first half of the
18209 // split argument.
18210 CCValAssign VA = PendingLocs[0];
18211 ISD::ArgFlagsTy AF = PendingArgFlags[0];
18212 PendingLocs.clear();
18213 PendingArgFlags.clear();
18214 return CC_RISCVAssign2XLen(
18215 XLen, State, VA, AF, ValNo, ValVT, LocVT, ArgFlags,
18216 ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E);
18217 }
18218
18219 // Allocate to a register if possible, or else a stack slot.
18220 Register Reg;
18221 unsigned StoreSizeBytes = XLen / 8;
18222 Align StackAlign = Align(XLen / 8);
18223
18224 if ((ValVT == MVT::f16 || ValVT == MVT::bf16) && !UseGPRForF16_F32)
18225 Reg = State.AllocateReg(ArgFPR16s);
18226 else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
18227 Reg = State.AllocateReg(ArgFPR32s);
18228 else if (ValVT == MVT::f64 && !UseGPRForF64)
18229 Reg = State.AllocateReg(ArgFPR64s);
18230 else if (ValVT.isVector()) {
18231 Reg = allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI);
18232 if (!Reg) {
18233 // For return values, the vector must be passed fully via registers or
18234 // via the stack.
18235 // FIXME: The proposed vector ABI only mandates v8-v15 for return values,
18236 // but we're using all of them.
18237 if (IsRet)
18238 return true;
18239 // Try using a GPR to pass the address
18240 if ((Reg = State.AllocateReg(ArgGPRs))) {
18241 LocVT = XLenVT;
18242 LocInfo = CCValAssign::Indirect;
18243 } else if (ValVT.isScalableVector()) {
18244 LocVT = XLenVT;
18245 LocInfo = CCValAssign::Indirect;
18246 } else {
18247 // Pass fixed-length vectors on the stack.
18248 LocVT = ValVT;
18249 StoreSizeBytes = ValVT.getStoreSize();
18250 // Align vectors to their element sizes, being careful for vXi1
18251 // vectors.
18252 StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
18253 }
18254 }
18255 } else {
18256 Reg = State.AllocateReg(ArgGPRs);
18257 }
18258
18259 unsigned StackOffset =
18260 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
18261
18262 // If we reach this point and PendingLocs is non-empty, we must be at the
18263 // end of a split argument that must be passed indirectly.
18264 if (!PendingLocs.empty()) {
18265 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
18266 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
18267
18268 for (auto &It : PendingLocs) {
18269 if (Reg)
18270 It.convertToReg(Reg);
18271 else
18272 It.convertToMem(StackOffset);
18273 State.addLoc(It);
18274 }
18275 PendingLocs.clear();
18276 PendingArgFlags.clear();
18277 return false;
18278 }
18279
18280 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
18281 (TLI.getSubtarget().hasVInstructions() && ValVT.isVector())) &&
18282 "Expected an XLenVT or vector types at this stage");
18283
18284 if (Reg) {
18285 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18286 return false;
18287 }
18288
18289 // When a scalar floating-point value is passed on the stack, no
18290 // bit-conversion is needed.
18291 if (ValVT.isFloatingPoint() && LocInfo != CCValAssign::Indirect) {
18292 assert(!ValVT.isVector());
18293 LocVT = ValVT;
18294 LocInfo = CCValAssign::Full;
18295 }
18296 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18297 return false;
18298}
18299
18300template <typename ArgTy>
18301static std::optional<unsigned> preAssignMask(const ArgTy &Args) {
18302 for (const auto &ArgIdx : enumerate(Args)) {
18303 MVT ArgVT = ArgIdx.value().VT;
18304 if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1)
18305 return ArgIdx.index();
18306 }
18307 return std::nullopt;
18308}
18309
18310void RISCVTargetLowering::analyzeInputArgs(
18311 MachineFunction &MF, CCState &CCInfo,
18312 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
18313 RISCVCCAssignFn Fn) const {
18314 unsigned NumArgs = Ins.size();
18316
18317 std::optional<unsigned> FirstMaskArgument;
18318 if (Subtarget.hasVInstructions())
18319 FirstMaskArgument = preAssignMask(Ins);
18320
18321 for (unsigned i = 0; i != NumArgs; ++i) {
18322 MVT ArgVT = Ins[i].VT;
18323 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
18324
18325 Type *ArgTy = nullptr;
18326 if (IsRet)
18327 ArgTy = FType->getReturnType();
18328 else if (Ins[i].isOrigArg())
18329 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
18330
18332 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
18333 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
18334 FirstMaskArgument)) {
18335 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
18336 << ArgVT << '\n');
18337 llvm_unreachable(nullptr);
18338 }
18339 }
18340}
18341
18342void RISCVTargetLowering::analyzeOutputArgs(
18343 MachineFunction &MF, CCState &CCInfo,
18344 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
18345 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
18346 unsigned NumArgs = Outs.size();
18347
18348 std::optional<unsigned> FirstMaskArgument;
18349 if (Subtarget.hasVInstructions())
18350 FirstMaskArgument = preAssignMask(Outs);
18351
18352 for (unsigned i = 0; i != NumArgs; i++) {
18353 MVT ArgVT = Outs[i].VT;
18354 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
18355 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
18356
18358 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
18359 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
18360 FirstMaskArgument)) {
18361 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
18362 << ArgVT << "\n");
18363 llvm_unreachable(nullptr);
18364 }
18365 }
18366}
18367
18368// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
18369// values.
18371 const CCValAssign &VA, const SDLoc &DL,
18372 const RISCVSubtarget &Subtarget) {
18373 switch (VA.getLocInfo()) {
18374 default:
18375 llvm_unreachable("Unexpected CCValAssign::LocInfo");
18376 case CCValAssign::Full:
18378 Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
18379 break;
18380 case CCValAssign::BCvt:
18381 if (VA.getLocVT().isInteger() &&
18382 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
18383 Val = DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
18384 } else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {
18385 if (RV64LegalI32) {
18386 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Val);
18387 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
18388 } else {
18389 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
18390 }
18391 } else {
18392 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
18393 }
18394 break;
18395 }
18396 return Val;
18397}
18398
18399// The caller is responsible for loading the full value if the argument is
18400// passed with CCValAssign::Indirect.
18402 const CCValAssign &VA, const SDLoc &DL,
18403 const ISD::InputArg &In,
18404 const RISCVTargetLowering &TLI) {
18407 EVT LocVT = VA.getLocVT();
18408 SDValue Val;
18409 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
18410 Register VReg = RegInfo.createVirtualRegister(RC);
18411 RegInfo.addLiveIn(VA.getLocReg(), VReg);
18412 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
18413
18414 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
18415 if (In.isOrigArg()) {
18416 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
18417 if (OrigArg->getType()->isIntegerTy()) {
18418 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
18419 // An input zero extended from i31 can also be considered sign extended.
18420 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
18421 (BitWidth < 32 && In.Flags.isZExt())) {
18423 RVFI->addSExt32Register(VReg);
18424 }
18425 }
18426 }
18427
18429 return Val;
18430
18431 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
18432}
18433
18435 const CCValAssign &VA, const SDLoc &DL,
18436 const RISCVSubtarget &Subtarget) {
18437 EVT LocVT = VA.getLocVT();
18438
18439 switch (VA.getLocInfo()) {
18440 default:
18441 llvm_unreachable("Unexpected CCValAssign::LocInfo");
18442 case CCValAssign::Full:
18443 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
18444 Val = convertToScalableVector(LocVT, Val, DAG, Subtarget);
18445 break;
18446 case CCValAssign::BCvt:
18447 if (LocVT.isInteger() &&
18448 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
18449 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
18450 } else if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32) {
18451 if (RV64LegalI32) {
18452 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
18453 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val);
18454 } else {
18455 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
18456 }
18457 } else {
18458 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
18459 }
18460 break;
18461 }
18462 return Val;
18463}
18464
18465// The caller is responsible for loading the full value if the argument is
18466// passed with CCValAssign::Indirect.
18468 const CCValAssign &VA, const SDLoc &DL) {
18470 MachineFrameInfo &MFI = MF.getFrameInfo();
18471 EVT LocVT = VA.getLocVT();
18472 EVT ValVT = VA.getValVT();
18474 if (ValVT.isScalableVector()) {
18475 // When the value is a scalable vector, we save the pointer which points to
18476 // the scalable vector value in the stack. The ValVT will be the pointer
18477 // type, instead of the scalable vector type.
18478 ValVT = LocVT;
18479 }
18480 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
18481 /*IsImmutable=*/true);
18482 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
18483 SDValue Val;
18484
18485 ISD::LoadExtType ExtType;
18486 switch (VA.getLocInfo()) {
18487 default:
18488 llvm_unreachable("Unexpected CCValAssign::LocInfo");
18489 case CCValAssign::Full:
18491 case CCValAssign::BCvt:
18492 ExtType = ISD::NON_EXTLOAD;
18493 break;
18494 }
18495 Val = DAG.getExtLoad(
18496 ExtType, DL, LocVT, Chain, FIN,
18498 return Val;
18499}
18500
18502 const CCValAssign &VA,
18503 const CCValAssign &HiVA,
18504 const SDLoc &DL) {
18505 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
18506 "Unexpected VA");
18508 MachineFrameInfo &MFI = MF.getFrameInfo();
18510
18511 assert(VA.isRegLoc() && "Expected register VA assignment");
18512
18513 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
18514 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
18515 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
18516 SDValue Hi;
18517 if (HiVA.isMemLoc()) {
18518 // Second half of f64 is passed on the stack.
18519 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
18520 /*IsImmutable=*/true);
18521 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
18522 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
18524 } else {
18525 // Second half of f64 is passed in another GPR.
18526 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
18527 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
18528 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
18529 }
18530 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
18531}
18532
18533// FastCC has less than 1% performance improvement for some particular
18534// benchmark. But theoretically, it may has benenfit for some cases.
18536 unsigned ValNo, MVT ValVT, MVT LocVT,
18537 CCValAssign::LocInfo LocInfo,
18538 ISD::ArgFlagsTy ArgFlags, CCState &State,
18539 bool IsFixed, bool IsRet, Type *OrigTy,
18540 const RISCVTargetLowering &TLI,
18541 std::optional<unsigned> FirstMaskArgument) {
18542 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
18543 if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
18544 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18545 return false;
18546 }
18547 }
18548
18549 const RISCVSubtarget &Subtarget = TLI.getSubtarget();
18550
18551 if (LocVT == MVT::f16 &&
18552 (Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZfhmin())) {
18553 static const MCPhysReg FPR16List[] = {
18554 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
18555 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H,
18556 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H,
18557 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
18558 if (unsigned Reg = State.AllocateReg(FPR16List)) {
18559 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18560 return false;
18561 }
18562 }
18563
18564 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
18565 static const MCPhysReg FPR32List[] = {
18566 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
18567 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F,
18568 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F,
18569 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
18570 if (unsigned Reg = State.AllocateReg(FPR32List)) {
18571 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18572 return false;
18573 }
18574 }
18575
18576 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
18577 static const MCPhysReg FPR64List[] = {
18578 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
18579 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D,
18580 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D,
18581 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
18582 if (unsigned Reg = State.AllocateReg(FPR64List)) {
18583 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18584 return false;
18585 }
18586 }
18587
18588 // Check if there is an available GPR before hitting the stack.
18589 if ((LocVT == MVT::f16 &&
18590 (Subtarget.hasStdExtZhinx() || Subtarget.hasStdExtZhinxmin())) ||
18591 (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
18592 (LocVT == MVT::f64 && Subtarget.is64Bit() &&
18593 Subtarget.hasStdExtZdinx())) {
18594 if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
18595 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18596 return false;
18597 }
18598 }
18599
18600 if (LocVT == MVT::f16) {
18601 unsigned Offset2 = State.AllocateStack(2, Align(2));
18602 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset2, LocVT, LocInfo));
18603 return false;
18604 }
18605
18606 if (LocVT == MVT::i32 || LocVT == MVT::f32) {
18607 unsigned Offset4 = State.AllocateStack(4, Align(4));
18608 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
18609 return false;
18610 }
18611
18612 if (LocVT == MVT::i64 || LocVT == MVT::f64) {
18613 unsigned Offset5 = State.AllocateStack(8, Align(8));
18614 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
18615 return false;
18616 }
18617
18618 if (LocVT.isVector()) {
18619 if (unsigned Reg =
18620 allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI)) {
18621 // Fixed-length vectors are located in the corresponding scalable-vector
18622 // container types.
18623 if (ValVT.isFixedLengthVector())
18624 LocVT = TLI.getContainerForFixedLengthVector(LocVT);
18625 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18626 } else {
18627 // Try and pass the address via a "fast" GPR.
18628 if (unsigned GPRReg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
18629 LocInfo = CCValAssign::Indirect;
18630 LocVT = TLI.getSubtarget().getXLenVT();
18631 State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo));
18632 } else if (ValVT.isFixedLengthVector()) {
18633 auto StackAlign =
18635 unsigned StackOffset =
18636 State.AllocateStack(ValVT.getStoreSize(), StackAlign);
18637 State.addLoc(
18638 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
18639 } else {
18640 // Can't pass scalable vectors on the stack.
18641 return true;
18642 }
18643 }
18644
18645 return false;
18646 }
18647
18648 return true; // CC didn't match.
18649}
18650
18651bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
18652 CCValAssign::LocInfo LocInfo,
18653 ISD::ArgFlagsTy ArgFlags, CCState &State) {
18654 if (ArgFlags.isNest()) {
18656 "Attribute 'nest' is not supported in GHC calling convention");
18657 }
18658
18659 static const MCPhysReg GPRList[] = {
18660 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
18661 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
18662
18663 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
18664 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
18665 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11
18666 if (unsigned Reg = State.AllocateReg(GPRList)) {
18667 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18668 return false;
18669 }
18670 }
18671
18672 const RISCVSubtarget &Subtarget =
18674
18675 if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
18676 // Pass in STG registers: F1, ..., F6
18677 // fs0 ... fs5
18678 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
18679 RISCV::F18_F, RISCV::F19_F,
18680 RISCV::F20_F, RISCV::F21_F};
18681 if (unsigned Reg = State.AllocateReg(FPR32List)) {
18682 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18683 return false;
18684 }
18685 }
18686
18687 if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
18688 // Pass in STG registers: D1, ..., D6
18689 // fs6 ... fs11
18690 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
18691 RISCV::F24_D, RISCV::F25_D,
18692 RISCV::F26_D, RISCV::F27_D};
18693 if (unsigned Reg = State.AllocateReg(FPR64List)) {
18694 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18695 return false;
18696 }
18697 }
18698
18699 if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
18700 (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() &&
18701 Subtarget.is64Bit())) {
18702 if (unsigned Reg = State.AllocateReg(GPRList)) {
18703 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
18704 return false;
18705 }
18706 }
18707
18708 report_fatal_error("No registers left in GHC calling convention");
18709 return true;
18710}
18711
18712// Transform physical registers into virtual registers.
18714 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
18715 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
18716 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
18717
18719
18720 switch (CallConv) {
18721 default:
18722 report_fatal_error("Unsupported calling convention");
18723 case CallingConv::C:
18724 case CallingConv::Fast:
18726 case CallingConv::GRAAL:
18727 break;
18728 case CallingConv::GHC:
18729 if (Subtarget.isRVE())
18730 report_fatal_error("GHC calling convention is not supported on RVE!");
18731 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
18732 report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
18733 "(Zdinx/D) instruction set extensions");
18734 }
18735
18736 const Function &Func = MF.getFunction();
18737 if (Func.hasFnAttribute("interrupt")) {
18738 if (!Func.arg_empty())
18740 "Functions with the interrupt attribute cannot have arguments!");
18741
18742 StringRef Kind =
18743 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
18744
18745 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
18747 "Function interrupt attribute argument not supported!");
18748 }
18749
18750 EVT PtrVT = getPointerTy(DAG.getDataLayout());
18751 MVT XLenVT = Subtarget.getXLenVT();
18752 unsigned XLenInBytes = Subtarget.getXLen() / 8;
18753 // Used with vargs to acumulate store chains.
18754 std::vector<SDValue> OutChains;
18755
18756 // Assign locations to all of the incoming arguments.
18758 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
18759
18760 if (CallConv == CallingConv::GHC)
18762 else
18763 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
18765 : RISCV::CC_RISCV);
18766
18767 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
18768 CCValAssign &VA = ArgLocs[i];
18769 SDValue ArgValue;
18770 // Passing f64 on RV32D with a soft float ABI must be handled as a special
18771 // case.
18772 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
18773 assert(VA.needsCustom());
18774 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
18775 } else if (VA.isRegLoc())
18776 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
18777 else
18778 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
18779
18780 if (VA.getLocInfo() == CCValAssign::Indirect) {
18781 // If the original argument was split and passed by reference (e.g. i128
18782 // on RV32), we need to load all parts of it here (using the same
18783 // address). Vectors may be partly split to registers and partly to the
18784 // stack, in which case the base address is partly offset and subsequent
18785 // stores are relative to that.
18786 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
18788 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
18789 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
18790 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
18791 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
18792 CCValAssign &PartVA = ArgLocs[i + 1];
18793 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
18794 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
18795 if (PartVA.getValVT().isScalableVector())
18796 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
18797 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
18798 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
18800 ++i;
18801 ++InsIdx;
18802 }
18803 continue;
18804 }
18805 InVals.push_back(ArgValue);
18806 }
18807
18808 if (any_of(ArgLocs,
18809 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
18810 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
18811
18812 if (IsVarArg) {
18813 ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());
18814 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
18815 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
18816 MachineFrameInfo &MFI = MF.getFrameInfo();
18817 MachineRegisterInfo &RegInfo = MF.getRegInfo();
18819
18820 // Size of the vararg save area. For now, the varargs save area is either
18821 // zero or large enough to hold a0-a7.
18822 int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
18823 int FI;
18824
18825 // If all registers are allocated, then all varargs must be passed on the
18826 // stack and we don't need to save any argregs.
18827 if (VarArgsSaveSize == 0) {
18828 int VaArgOffset = CCInfo.getStackSize();
18829 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
18830 } else {
18831 int VaArgOffset = -VarArgsSaveSize;
18832 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
18833
18834 // If saving an odd number of registers then create an extra stack slot to
18835 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
18836 // offsets to even-numbered registered remain 2*XLEN-aligned.
18837 if (Idx % 2) {
18839 XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);
18840 VarArgsSaveSize += XLenInBytes;
18841 }
18842
18843 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
18844
18845 // Copy the integer registers that may have been used for passing varargs
18846 // to the vararg save area.
18847 for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
18848 const Register Reg = RegInfo.createVirtualRegister(RC);
18849 RegInfo.addLiveIn(ArgRegs[I], Reg);
18850 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
18851 SDValue Store = DAG.getStore(
18852 Chain, DL, ArgValue, FIN,
18853 MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));
18854 OutChains.push_back(Store);
18855 FIN =
18856 DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);
18857 }
18858 }
18859
18860 // Record the frame index of the first variable argument
18861 // which is a value necessary to VASTART.
18862 RVFI->setVarArgsFrameIndex(FI);
18863 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
18864 }
18865
18866 // All stores are grouped in one node to allow the matching between
18867 // the size of Ins and InVals. This only happens for vararg functions.
18868 if (!OutChains.empty()) {
18869 OutChains.push_back(Chain);
18870 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
18871 }
18872
18873 return Chain;
18874}
18875
18876/// isEligibleForTailCallOptimization - Check whether the call is eligible
18877/// for tail call optimization.
18878/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
18879bool RISCVTargetLowering::isEligibleForTailCallOptimization(
18880 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
18881 const SmallVector<CCValAssign, 16> &ArgLocs) const {
18882
18883 auto CalleeCC = CLI.CallConv;
18884 auto &Outs = CLI.Outs;
18885 auto &Caller = MF.getFunction();
18886 auto CallerCC = Caller.getCallingConv();
18887
18888 // Exception-handling functions need a special set of instructions to
18889 // indicate a return to the hardware. Tail-calling another function would
18890 // probably break this.
18891 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
18892 // should be expanded as new function attributes are introduced.
18893 if (Caller.hasFnAttribute("interrupt"))
18894 return false;
18895
18896 // Do not tail call opt if the stack is used to pass parameters.
18897 if (CCInfo.getStackSize() != 0)
18898 return false;
18899
18900 // Do not tail call opt if any parameters need to be passed indirectly.
18901 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
18902 // passed indirectly. So the address of the value will be passed in a
18903 // register, or if not available, then the address is put on the stack. In
18904 // order to pass indirectly, space on the stack often needs to be allocated
18905 // in order to store the value. In this case the CCInfo.getNextStackOffset()
18906 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
18907 // are passed CCValAssign::Indirect.
18908 for (auto &VA : ArgLocs)
18909 if (VA.getLocInfo() == CCValAssign::Indirect)
18910 return false;
18911
18912 // Do not tail call opt if either caller or callee uses struct return
18913 // semantics.
18914 auto IsCallerStructRet = Caller.hasStructRetAttr();
18915 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
18916 if (IsCallerStructRet || IsCalleeStructRet)
18917 return false;
18918
18919 // The callee has to preserve all registers the caller needs to preserve.
18920 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
18921 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
18922 if (CalleeCC != CallerCC) {
18923 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
18924 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
18925 return false;
18926 }
18927
18928 // Byval parameters hand the function a pointer directly into the stack area
18929 // we want to reuse during a tail call. Working around this *is* possible
18930 // but less efficient and uglier in LowerCall.
18931 for (auto &Arg : Outs)
18932 if (Arg.Flags.isByVal())
18933 return false;
18934
18935 return true;
18936}
18937
18939 return DAG.getDataLayout().getPrefTypeAlign(
18940 VT.getTypeForEVT(*DAG.getContext()));
18941}
18942
18943// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
18944// and output parameter nodes.
18946 SmallVectorImpl<SDValue> &InVals) const {
18947 SelectionDAG &DAG = CLI.DAG;
18948 SDLoc &DL = CLI.DL;
18950 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
18952 SDValue Chain = CLI.Chain;
18953 SDValue Callee = CLI.Callee;
18954 bool &IsTailCall = CLI.IsTailCall;
18955 CallingConv::ID CallConv = CLI.CallConv;
18956 bool IsVarArg = CLI.IsVarArg;
18957 EVT PtrVT = getPointerTy(DAG.getDataLayout());
18958 MVT XLenVT = Subtarget.getXLenVT();
18959
18961
18962 // Analyze the operands of the call, assigning locations to each operand.
18964 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
18965
18966 if (CallConv == CallingConv::GHC) {
18967 if (Subtarget.isRVE())
18968 report_fatal_error("GHC calling convention is not supported on RVE!");
18970 } else
18971 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
18973 : RISCV::CC_RISCV);
18974
18975 // Check if it's really possible to do a tail call.
18976 if (IsTailCall)
18977 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
18978
18979 if (IsTailCall)
18980 ++NumTailCalls;
18981 else if (CLI.CB && CLI.CB->isMustTailCall())
18982 report_fatal_error("failed to perform tail call elimination on a call "
18983 "site marked musttail");
18984
18985 // Get a count of how many bytes are to be pushed on the stack.
18986 unsigned NumBytes = ArgCCInfo.getStackSize();
18987
18988 // Create local copies for byval args
18989 SmallVector<SDValue, 8> ByValArgs;
18990 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
18991 ISD::ArgFlagsTy Flags = Outs[i].Flags;
18992 if (!Flags.isByVal())
18993 continue;
18994
18995 SDValue Arg = OutVals[i];
18996 unsigned Size = Flags.getByValSize();
18997 Align Alignment = Flags.getNonZeroByValAlign();
18998
18999 int FI =
19000 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
19001 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
19002 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
19003
19004 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
19005 /*IsVolatile=*/false,
19006 /*AlwaysInline=*/false, IsTailCall,
19008 ByValArgs.push_back(FIPtr);
19009 }
19010
19011 if (!IsTailCall)
19012 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
19013
19014 // Copy argument values to their designated locations.
19016 SmallVector<SDValue, 8> MemOpChains;
19017 SDValue StackPtr;
19018 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
19019 ++i, ++OutIdx) {
19020 CCValAssign &VA = ArgLocs[i];
19021 SDValue ArgValue = OutVals[OutIdx];
19022 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
19023
19024 // Handle passing f64 on RV32D with a soft float ABI as a special case.
19025 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19026 assert(VA.isRegLoc() && "Expected register VA assignment");
19027 assert(VA.needsCustom());
19028 SDValue SplitF64 = DAG.getNode(
19029 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
19030 SDValue Lo = SplitF64.getValue(0);
19031 SDValue Hi = SplitF64.getValue(1);
19032
19033 Register RegLo = VA.getLocReg();
19034 RegsToPass.push_back(std::make_pair(RegLo, Lo));
19035
19036 // Get the CCValAssign for the Hi part.
19037 CCValAssign &HiVA = ArgLocs[++i];
19038
19039 if (HiVA.isMemLoc()) {
19040 // Second half of f64 is passed on the stack.
19041 if (!StackPtr.getNode())
19042 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
19044 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
19045 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
19046 // Emit the store.
19047 MemOpChains.push_back(
19048 DAG.getStore(Chain, DL, Hi, Address, MachinePointerInfo()));
19049 } else {
19050 // Second half of f64 is passed in another GPR.
19051 Register RegHigh = HiVA.getLocReg();
19052 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
19053 }
19054 continue;
19055 }
19056
19057 // Promote the value if needed.
19058 // For now, only handle fully promoted and indirect arguments.
19059 if (VA.getLocInfo() == CCValAssign::Indirect) {
19060 // Store the argument in a stack slot and pass its address.
19061 Align StackAlign =
19062 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
19063 getPrefTypeAlign(ArgValue.getValueType(), DAG));
19064 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
19065 // If the original argument was split (e.g. i128), we need
19066 // to store the required parts of it here (and pass just one address).
19067 // Vectors may be partly split to registers and partly to the stack, in
19068 // which case the base address is partly offset and subsequent stores are
19069 // relative to that.
19070 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
19071 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
19072 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
19073 // Calculate the total size to store. We don't have access to what we're
19074 // actually storing other than performing the loop and collecting the
19075 // info.
19077 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
19078 SDValue PartValue = OutVals[OutIdx + 1];
19079 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
19080 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
19081 EVT PartVT = PartValue.getValueType();
19082 if (PartVT.isScalableVector())
19083 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
19084 StoredSize += PartVT.getStoreSize();
19085 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
19086 Parts.push_back(std::make_pair(PartValue, Offset));
19087 ++i;
19088 ++OutIdx;
19089 }
19090 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
19091 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
19092 MemOpChains.push_back(
19093 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
19095 for (const auto &Part : Parts) {
19096 SDValue PartValue = Part.first;
19097 SDValue PartOffset = Part.second;
19099 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
19100 MemOpChains.push_back(
19101 DAG.getStore(Chain, DL, PartValue, Address,
19103 }
19104 ArgValue = SpillSlot;
19105 } else {
19106 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
19107 }
19108
19109 // Use local copy if it is a byval arg.
19110 if (Flags.isByVal())
19111 ArgValue = ByValArgs[j++];
19112
19113 if (VA.isRegLoc()) {
19114 // Queue up the argument copies and emit them at the end.
19115 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
19116 } else {
19117 assert(VA.isMemLoc() && "Argument not register or memory");
19118 assert(!IsTailCall && "Tail call not allowed if stack is used "
19119 "for passing parameters");
19120
19121 // Work out the address of the stack slot.
19122 if (!StackPtr.getNode())
19123 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
19125 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
19127
19128 // Emit the store.
19129 MemOpChains.push_back(
19130 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
19131 }
19132 }
19133
19134 // Join the stores, which are independent of one another.
19135 if (!MemOpChains.empty())
19136 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
19137
19138 SDValue Glue;
19139
19140 // Build a sequence of copy-to-reg nodes, chained and glued together.
19141 for (auto &Reg : RegsToPass) {
19142 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
19143 Glue = Chain.getValue(1);
19144 }
19145
19146 // Validate that none of the argument registers have been marked as
19147 // reserved, if so report an error. Do the same for the return address if this
19148 // is not a tailcall.
19149 validateCCReservedRegs(RegsToPass, MF);
19150 if (!IsTailCall &&
19153 MF.getFunction(),
19154 "Return address register required, but has been reserved."});
19155
19156 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
19157 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
19158 // split it and then direct call can be matched by PseudoCALL.
19159 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
19160 const GlobalValue *GV = S->getGlobal();
19161 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL);
19162 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
19163 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL);
19164 }
19165
19166 // The first call operand is the chain and the second is the target address.
19168 Ops.push_back(Chain);
19169 Ops.push_back(Callee);
19170
19171 // Add argument registers to the end of the list so that they are
19172 // known live into the call.
19173 for (auto &Reg : RegsToPass)
19174 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
19175
19176 if (!IsTailCall) {
19177 // Add a register mask operand representing the call-preserved registers.
19178 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
19179 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
19180 assert(Mask && "Missing call preserved mask for calling convention");
19181 Ops.push_back(DAG.getRegisterMask(Mask));
19182 }
19183
19184 // Glue the call to the argument copies, if any.
19185 if (Glue.getNode())
19186 Ops.push_back(Glue);
19187
19188 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
19189 "Unexpected CFI type for a direct call");
19190
19191 // Emit the call.
19192 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
19193
19194 if (IsTailCall) {
19196 SDValue Ret = DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
19197 if (CLI.CFIType)
19198 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
19199 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
19200 return Ret;
19201 }
19202
19203 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
19204 if (CLI.CFIType)
19205 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
19206 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
19207 Glue = Chain.getValue(1);
19208
19209 // Mark the end of the call, which is glued to the call itself.
19210 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
19211 Glue = Chain.getValue(1);
19212
19213 // Assign locations to each value returned by this call.
19215 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
19216 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, RISCV::CC_RISCV);
19217
19218 // Copy all of the result registers out of their specified physreg.
19219 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
19220 auto &VA = RVLocs[i];
19221 // Copy the value out
19222 SDValue RetValue =
19223 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
19224 // Glue the RetValue to the end of the call sequence
19225 Chain = RetValue.getValue(1);
19226 Glue = RetValue.getValue(2);
19227
19228 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19229 assert(VA.needsCustom());
19230 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
19231 MVT::i32, Glue);
19232 Chain = RetValue2.getValue(1);
19233 Glue = RetValue2.getValue(2);
19234 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
19235 RetValue2);
19236 }
19237
19238 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
19239
19240 InVals.push_back(RetValue);
19241 }
19242
19243 return Chain;
19244}
19245
19247 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
19248 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
19250 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
19251
19252 std::optional<unsigned> FirstMaskArgument;
19253 if (Subtarget.hasVInstructions())
19254 FirstMaskArgument = preAssignMask(Outs);
19255
19256 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
19257 MVT VT = Outs[i].VT;
19258 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
19259 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
19260 if (RISCV::CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
19261 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr,
19262 *this, FirstMaskArgument))
19263 return false;
19264 }
19265 return true;
19266}
19267
19268SDValue
19270 bool IsVarArg,
19272 const SmallVectorImpl<SDValue> &OutVals,
19273 const SDLoc &DL, SelectionDAG &DAG) const {
19275 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
19276
19277 // Stores the assignment of the return value to a location.
19279
19280 // Info about the registers and stack slot.
19281 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
19282 *DAG.getContext());
19283
19284 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
19285 nullptr, RISCV::CC_RISCV);
19286
19287 if (CallConv == CallingConv::GHC && !RVLocs.empty())
19288 report_fatal_error("GHC functions return void only");
19289
19290 SDValue Glue;
19291 SmallVector<SDValue, 4> RetOps(1, Chain);
19292
19293 // Copy the result values into the output registers.
19294 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
19295 SDValue Val = OutVals[OutIdx];
19296 CCValAssign &VA = RVLocs[i];
19297 assert(VA.isRegLoc() && "Can only return in registers!");
19298
19299 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19300 // Handle returning f64 on RV32D with a soft float ABI.
19301 assert(VA.isRegLoc() && "Expected return via registers");
19302 assert(VA.needsCustom());
19303 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
19304 DAG.getVTList(MVT::i32, MVT::i32), Val);
19305 SDValue Lo = SplitF64.getValue(0);
19306 SDValue Hi = SplitF64.getValue(1);
19307 Register RegLo = VA.getLocReg();
19308 Register RegHi = RVLocs[++i].getLocReg();
19309
19310 if (STI.isRegisterReservedByUser(RegLo) ||
19311 STI.isRegisterReservedByUser(RegHi))
19313 MF.getFunction(),
19314 "Return value register required, but has been reserved."});
19315
19316 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
19317 Glue = Chain.getValue(1);
19318 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
19319 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
19320 Glue = Chain.getValue(1);
19321 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
19322 } else {
19323 // Handle a 'normal' return.
19324 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
19325 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
19326
19327 if (STI.isRegisterReservedByUser(VA.getLocReg()))
19329 MF.getFunction(),
19330 "Return value register required, but has been reserved."});
19331
19332 // Guarantee that all emitted copies are stuck together.
19333 Glue = Chain.getValue(1);
19334 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
19335 }
19336 }
19337
19338 RetOps[0] = Chain; // Update chain.
19339
19340 // Add the glue node if we have it.
19341 if (Glue.getNode()) {
19342 RetOps.push_back(Glue);
19343 }
19344
19345 if (any_of(RVLocs,
19346 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
19347 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
19348
19349 unsigned RetOpc = RISCVISD::RET_GLUE;
19350 // Interrupt service routines use different return instructions.
19351 const Function &Func = DAG.getMachineFunction().getFunction();
19352 if (Func.hasFnAttribute("interrupt")) {
19353 if (!Func.getReturnType()->isVoidTy())
19355 "Functions with the interrupt attribute must have void return type!");
19356
19358 StringRef Kind =
19359 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
19360
19361 if (Kind == "supervisor")
19362 RetOpc = RISCVISD::SRET_GLUE;
19363 else
19364 RetOpc = RISCVISD::MRET_GLUE;
19365 }
19366
19367 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
19368}
19369
19370void RISCVTargetLowering::validateCCReservedRegs(
19371 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
19372 MachineFunction &MF) const {
19373 const Function &F = MF.getFunction();
19374 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
19375
19376 if (llvm::any_of(Regs, [&STI](auto Reg) {
19377 return STI.isRegisterReservedByUser(Reg.first);
19378 }))
19379 F.getContext().diagnose(DiagnosticInfoUnsupported{
19380 F, "Argument register required, but has been reserved."});
19381}
19382
19383// Check if the result of the node is only used as a return value, as
19384// otherwise we can't perform a tail-call.
19386 if (N->getNumValues() != 1)
19387 return false;
19388 if (!N->hasNUsesOfValue(1, 0))
19389 return false;
19390
19391 SDNode *Copy = *N->use_begin();
19392
19393 if (Copy->getOpcode() == ISD::BITCAST) {
19394 return isUsedByReturnOnly(Copy, Chain);
19395 }
19396
19397 // TODO: Handle additional opcodes in order to support tail-calling libcalls
19398 // with soft float ABIs.
19399 if (Copy->getOpcode() != ISD::CopyToReg) {
19400 return false;
19401 }
19402
19403 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
19404 // isn't safe to perform a tail call.
19405 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
19406 return false;
19407
19408 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
19409 bool HasRet = false;
19410 for (SDNode *Node : Copy->uses()) {
19411 if (Node->getOpcode() != RISCVISD::RET_GLUE)
19412 return false;
19413 HasRet = true;
19414 }
19415 if (!HasRet)
19416 return false;
19417
19418 Chain = Copy->getOperand(0);
19419 return true;
19420}
19421
19423 return CI->isTailCall();
19424}
19425
19426const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
19427#define NODE_NAME_CASE(NODE) \
19428 case RISCVISD::NODE: \
19429 return "RISCVISD::" #NODE;
19430 // clang-format off
19431 switch ((RISCVISD::NodeType)Opcode) {
19433 break;
19434 NODE_NAME_CASE(RET_GLUE)
19435 NODE_NAME_CASE(SRET_GLUE)
19436 NODE_NAME_CASE(MRET_GLUE)
19437 NODE_NAME_CASE(CALL)
19438 NODE_NAME_CASE(SELECT_CC)
19439 NODE_NAME_CASE(BR_CC)
19440 NODE_NAME_CASE(BuildPairF64)
19441 NODE_NAME_CASE(SplitF64)
19442 NODE_NAME_CASE(TAIL)
19443 NODE_NAME_CASE(ADD_LO)
19444 NODE_NAME_CASE(HI)
19445 NODE_NAME_CASE(LLA)
19446 NODE_NAME_CASE(ADD_TPREL)
19447 NODE_NAME_CASE(MULHSU)
19448 NODE_NAME_CASE(SLLW)
19449 NODE_NAME_CASE(SRAW)
19450 NODE_NAME_CASE(SRLW)
19451 NODE_NAME_CASE(DIVW)
19452 NODE_NAME_CASE(DIVUW)
19453 NODE_NAME_CASE(REMUW)
19454 NODE_NAME_CASE(ROLW)
19455 NODE_NAME_CASE(RORW)
19456 NODE_NAME_CASE(CLZW)
19457 NODE_NAME_CASE(CTZW)
19458 NODE_NAME_CASE(ABSW)
19459 NODE_NAME_CASE(FMV_H_X)
19460 NODE_NAME_CASE(FMV_X_ANYEXTH)
19461 NODE_NAME_CASE(FMV_X_SIGNEXTH)
19462 NODE_NAME_CASE(FMV_W_X_RV64)
19463 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
19464 NODE_NAME_CASE(FCVT_X)
19465 NODE_NAME_CASE(FCVT_XU)
19466 NODE_NAME_CASE(FCVT_W_RV64)
19467 NODE_NAME_CASE(FCVT_WU_RV64)
19468 NODE_NAME_CASE(STRICT_FCVT_W_RV64)
19469 NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
19470 NODE_NAME_CASE(FP_ROUND_BF16)
19471 NODE_NAME_CASE(FP_EXTEND_BF16)
19472 NODE_NAME_CASE(FROUND)
19473 NODE_NAME_CASE(FCLASS)
19474 NODE_NAME_CASE(FMAX)
19475 NODE_NAME_CASE(FMIN)
19476 NODE_NAME_CASE(READ_COUNTER_WIDE)
19477 NODE_NAME_CASE(BREV8)
19478 NODE_NAME_CASE(ORC_B)
19479 NODE_NAME_CASE(ZIP)
19480 NODE_NAME_CASE(UNZIP)
19481 NODE_NAME_CASE(CLMUL)
19482 NODE_NAME_CASE(CLMULH)
19483 NODE_NAME_CASE(CLMULR)
19484 NODE_NAME_CASE(MOPR)
19485 NODE_NAME_CASE(MOPRR)
19486 NODE_NAME_CASE(SHA256SIG0)
19487 NODE_NAME_CASE(SHA256SIG1)
19488 NODE_NAME_CASE(SHA256SUM0)
19489 NODE_NAME_CASE(SHA256SUM1)
19490 NODE_NAME_CASE(SM4KS)
19491 NODE_NAME_CASE(SM4ED)
19492 NODE_NAME_CASE(SM3P0)
19493 NODE_NAME_CASE(SM3P1)
19494 NODE_NAME_CASE(TH_LWD)
19495 NODE_NAME_CASE(TH_LWUD)
19496 NODE_NAME_CASE(TH_LDD)
19497 NODE_NAME_CASE(TH_SWD)
19498 NODE_NAME_CASE(TH_SDD)
19499 NODE_NAME_CASE(VMV_V_V_VL)
19500 NODE_NAME_CASE(VMV_V_X_VL)
19501 NODE_NAME_CASE(VFMV_V_F_VL)
19502 NODE_NAME_CASE(VMV_X_S)
19503 NODE_NAME_CASE(VMV_S_X_VL)
19504 NODE_NAME_CASE(VFMV_S_F_VL)
19505 NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
19506 NODE_NAME_CASE(READ_VLENB)
19507 NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
19508 NODE_NAME_CASE(VSLIDEUP_VL)
19509 NODE_NAME_CASE(VSLIDE1UP_VL)
19510 NODE_NAME_CASE(VSLIDEDOWN_VL)
19511 NODE_NAME_CASE(VSLIDE1DOWN_VL)
19512 NODE_NAME_CASE(VFSLIDE1UP_VL)
19513 NODE_NAME_CASE(VFSLIDE1DOWN_VL)
19514 NODE_NAME_CASE(VID_VL)
19515 NODE_NAME_CASE(VFNCVT_ROD_VL)
19516 NODE_NAME_CASE(VECREDUCE_ADD_VL)
19517 NODE_NAME_CASE(VECREDUCE_UMAX_VL)
19518 NODE_NAME_CASE(VECREDUCE_SMAX_VL)
19519 NODE_NAME_CASE(VECREDUCE_UMIN_VL)
19520 NODE_NAME_CASE(VECREDUCE_SMIN_VL)
19521 NODE_NAME_CASE(VECREDUCE_AND_VL)
19522 NODE_NAME_CASE(VECREDUCE_OR_VL)
19523 NODE_NAME_CASE(VECREDUCE_XOR_VL)
19524 NODE_NAME_CASE(VECREDUCE_FADD_VL)
19525 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
19526 NODE_NAME_CASE(VECREDUCE_FMIN_VL)
19527 NODE_NAME_CASE(VECREDUCE_FMAX_VL)
19528 NODE_NAME_CASE(ADD_VL)
19529 NODE_NAME_CASE(AND_VL)
19530 NODE_NAME_CASE(MUL_VL)
19531 NODE_NAME_CASE(OR_VL)
19532 NODE_NAME_CASE(SDIV_VL)
19533 NODE_NAME_CASE(SHL_VL)
19534 NODE_NAME_CASE(SREM_VL)
19535 NODE_NAME_CASE(SRA_VL)
19536 NODE_NAME_CASE(SRL_VL)
19537 NODE_NAME_CASE(ROTL_VL)
19538 NODE_NAME_CASE(ROTR_VL)
19539 NODE_NAME_CASE(SUB_VL)
19540 NODE_NAME_CASE(UDIV_VL)
19541 NODE_NAME_CASE(UREM_VL)
19542 NODE_NAME_CASE(XOR_VL)
19543 NODE_NAME_CASE(AVGFLOORU_VL)
19544 NODE_NAME_CASE(AVGCEILU_VL)
19545 NODE_NAME_CASE(SADDSAT_VL)
19546 NODE_NAME_CASE(UADDSAT_VL)
19547 NODE_NAME_CASE(SSUBSAT_VL)
19548 NODE_NAME_CASE(USUBSAT_VL)
19549 NODE_NAME_CASE(FADD_VL)
19550 NODE_NAME_CASE(FSUB_VL)
19551 NODE_NAME_CASE(FMUL_VL)
19552 NODE_NAME_CASE(FDIV_VL)
19553 NODE_NAME_CASE(FNEG_VL)
19554 NODE_NAME_CASE(FABS_VL)
19555 NODE_NAME_CASE(FSQRT_VL)
19556 NODE_NAME_CASE(FCLASS_VL)
19557 NODE_NAME_CASE(VFMADD_VL)
19558 NODE_NAME_CASE(VFNMADD_VL)
19559 NODE_NAME_CASE(VFMSUB_VL)
19560 NODE_NAME_CASE(VFNMSUB_VL)
19561 NODE_NAME_CASE(VFWMADD_VL)
19562 NODE_NAME_CASE(VFWNMADD_VL)
19563 NODE_NAME_CASE(VFWMSUB_VL)
19564 NODE_NAME_CASE(VFWNMSUB_VL)
19565 NODE_NAME_CASE(FCOPYSIGN_VL)
19566 NODE_NAME_CASE(SMIN_VL)
19567 NODE_NAME_CASE(SMAX_VL)
19568 NODE_NAME_CASE(UMIN_VL)
19569 NODE_NAME_CASE(UMAX_VL)
19570 NODE_NAME_CASE(BITREVERSE_VL)
19571 NODE_NAME_CASE(BSWAP_VL)
19572 NODE_NAME_CASE(CTLZ_VL)
19573 NODE_NAME_CASE(CTTZ_VL)
19574 NODE_NAME_CASE(CTPOP_VL)
19575 NODE_NAME_CASE(VFMIN_VL)
19576 NODE_NAME_CASE(VFMAX_VL)
19577 NODE_NAME_CASE(MULHS_VL)
19578 NODE_NAME_CASE(MULHU_VL)
19579 NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)
19580 NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL)
19581 NODE_NAME_CASE(VFCVT_RM_X_F_VL)
19582 NODE_NAME_CASE(VFCVT_RM_XU_F_VL)
19583 NODE_NAME_CASE(VFCVT_X_F_VL)
19584 NODE_NAME_CASE(VFCVT_XU_F_VL)
19585 NODE_NAME_CASE(VFROUND_NOEXCEPT_VL)
19586 NODE_NAME_CASE(SINT_TO_FP_VL)
19587 NODE_NAME_CASE(UINT_TO_FP_VL)
19588 NODE_NAME_CASE(VFCVT_RM_F_XU_VL)
19589 NODE_NAME_CASE(VFCVT_RM_F_X_VL)
19590 NODE_NAME_CASE(FP_EXTEND_VL)
19591 NODE_NAME_CASE(FP_ROUND_VL)
19592 NODE_NAME_CASE(STRICT_FADD_VL)
19593 NODE_NAME_CASE(STRICT_FSUB_VL)
19594 NODE_NAME_CASE(STRICT_FMUL_VL)
19595 NODE_NAME_CASE(STRICT_FDIV_VL)
19596 NODE_NAME_CASE(STRICT_FSQRT_VL)
19597 NODE_NAME_CASE(STRICT_VFMADD_VL)
19598 NODE_NAME_CASE(STRICT_VFNMADD_VL)
19599 NODE_NAME_CASE(STRICT_VFMSUB_VL)
19600 NODE_NAME_CASE(STRICT_VFNMSUB_VL)
19601 NODE_NAME_CASE(STRICT_FP_ROUND_VL)
19602 NODE_NAME_CASE(STRICT_FP_EXTEND_VL)
19603 NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL)
19604 NODE_NAME_CASE(STRICT_SINT_TO_FP_VL)
19605 NODE_NAME_CASE(STRICT_UINT_TO_FP_VL)
19606 NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL)
19607 NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL)
19608 NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL)
19609 NODE_NAME_CASE(STRICT_FSETCC_VL)
19610 NODE_NAME_CASE(STRICT_FSETCCS_VL)
19611 NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL)
19612 NODE_NAME_CASE(VWMUL_VL)
19613 NODE_NAME_CASE(VWMULU_VL)
19614 NODE_NAME_CASE(VWMULSU_VL)
19615 NODE_NAME_CASE(VWADD_VL)
19616 NODE_NAME_CASE(VWADDU_VL)
19617 NODE_NAME_CASE(VWSUB_VL)
19618 NODE_NAME_CASE(VWSUBU_VL)
19619 NODE_NAME_CASE(VWADD_W_VL)
19620 NODE_NAME_CASE(VWADDU_W_VL)
19621 NODE_NAME_CASE(VWSUB_W_VL)
19622 NODE_NAME_CASE(VWSUBU_W_VL)
19623 NODE_NAME_CASE(VWSLL_VL)
19624 NODE_NAME_CASE(VFWMUL_VL)
19625 NODE_NAME_CASE(VFWADD_VL)
19626 NODE_NAME_CASE(VFWSUB_VL)
19627 NODE_NAME_CASE(VFWADD_W_VL)
19628 NODE_NAME_CASE(VFWSUB_W_VL)
19629 NODE_NAME_CASE(VWMACC_VL)
19630 NODE_NAME_CASE(VWMACCU_VL)
19631 NODE_NAME_CASE(VWMACCSU_VL)
19632 NODE_NAME_CASE(VNSRL_VL)
19633 NODE_NAME_CASE(SETCC_VL)
19634 NODE_NAME_CASE(VMERGE_VL)
19635 NODE_NAME_CASE(VMAND_VL)
19636 NODE_NAME_CASE(VMOR_VL)
19637 NODE_NAME_CASE(VMXOR_VL)
19638 NODE_NAME_CASE(VMCLR_VL)
19639 NODE_NAME_CASE(VMSET_VL)
19640 NODE_NAME_CASE(VRGATHER_VX_VL)
19641 NODE_NAME_CASE(VRGATHER_VV_VL)
19642 NODE_NAME_CASE(VRGATHEREI16_VV_VL)
19643 NODE_NAME_CASE(VSEXT_VL)
19644 NODE_NAME_CASE(VZEXT_VL)
19645 NODE_NAME_CASE(VCPOP_VL)
19646 NODE_NAME_CASE(VFIRST_VL)
19647 NODE_NAME_CASE(READ_CSR)
19648 NODE_NAME_CASE(WRITE_CSR)
19649 NODE_NAME_CASE(SWAP_CSR)
19650 NODE_NAME_CASE(CZERO_EQZ)
19651 NODE_NAME_CASE(CZERO_NEZ)
19652 NODE_NAME_CASE(SF_VC_XV_SE)
19653 NODE_NAME_CASE(SF_VC_IV_SE)
19654 NODE_NAME_CASE(SF_VC_VV_SE)
19655 NODE_NAME_CASE(SF_VC_FV_SE)
19656 NODE_NAME_CASE(SF_VC_XVV_SE)
19657 NODE_NAME_CASE(SF_VC_IVV_SE)
19658 NODE_NAME_CASE(SF_VC_VVV_SE)
19659 NODE_NAME_CASE(SF_VC_FVV_SE)
19660 NODE_NAME_CASE(SF_VC_XVW_SE)
19661 NODE_NAME_CASE(SF_VC_IVW_SE)
19662 NODE_NAME_CASE(SF_VC_VVW_SE)
19663 NODE_NAME_CASE(SF_VC_FVW_SE)
19664 NODE_NAME_CASE(SF_VC_V_X_SE)
19665 NODE_NAME_CASE(SF_VC_V_I_SE)
19666 NODE_NAME_CASE(SF_VC_V_XV_SE)
19667 NODE_NAME_CASE(SF_VC_V_IV_SE)
19668 NODE_NAME_CASE(SF_VC_V_VV_SE)
19669 NODE_NAME_CASE(SF_VC_V_FV_SE)
19670 NODE_NAME_CASE(SF_VC_V_XVV_SE)
19671 NODE_NAME_CASE(SF_VC_V_IVV_SE)
19672 NODE_NAME_CASE(SF_VC_V_VVV_SE)
19673 NODE_NAME_CASE(SF_VC_V_FVV_SE)
19674 NODE_NAME_CASE(SF_VC_V_XVW_SE)
19675 NODE_NAME_CASE(SF_VC_V_IVW_SE)
19676 NODE_NAME_CASE(SF_VC_V_VVW_SE)
19677 NODE_NAME_CASE(SF_VC_V_FVW_SE)
19678 }
19679 // clang-format on
19680 return nullptr;
19681#undef NODE_NAME_CASE
19682}
19683
19684/// getConstraintType - Given a constraint letter, return the type of
19685/// constraint it is for this target.
19688 if (Constraint.size() == 1) {
19689 switch (Constraint[0]) {
19690 default:
19691 break;
19692 case 'f':
19693 return C_RegisterClass;
19694 case 'I':
19695 case 'J':
19696 case 'K':
19697 return C_Immediate;
19698 case 'A':
19699 return C_Memory;
19700 case 's':
19701 case 'S': // A symbolic address
19702 return C_Other;
19703 }
19704 } else {
19705 if (Constraint == "vr" || Constraint == "vm")
19706 return C_RegisterClass;
19707 }
19708 return TargetLowering::getConstraintType(Constraint);
19709}
19710
19711std::pair<unsigned, const TargetRegisterClass *>
19713 StringRef Constraint,
19714 MVT VT) const {
19715 // First, see if this is a constraint that directly corresponds to a RISC-V
19716 // register class.
19717 if (Constraint.size() == 1) {
19718 switch (Constraint[0]) {
19719 case 'r':
19720 // TODO: Support fixed vectors up to XLen for P extension?
19721 if (VT.isVector())
19722 break;
19723 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
19724 return std::make_pair(0U, &RISCV::GPRF16RegClass);
19725 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
19726 return std::make_pair(0U, &RISCV::GPRF32RegClass);
19727 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
19728 return std::make_pair(0U, &RISCV::GPRPairRegClass);
19729 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
19730 case 'f':
19731 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16)
19732 return std::make_pair(0U, &RISCV::FPR16RegClass);
19733 if (Subtarget.hasStdExtF() && VT == MVT::f32)
19734 return std::make_pair(0U, &RISCV::FPR32RegClass);
19735 if (Subtarget.hasStdExtD() && VT == MVT::f64)
19736 return std::make_pair(0U, &RISCV::FPR64RegClass);
19737 break;
19738 default:
19739 break;
19740 }
19741 } else if (Constraint == "vr") {
19742 for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass,
19743 &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
19744 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
19745 return std::make_pair(0U, RC);
19746 }
19747 } else if (Constraint == "vm") {
19748 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
19749 return std::make_pair(0U, &RISCV::VMV0RegClass);
19750 }
19751
19752 // Clang will correctly decode the usage of register name aliases into their
19753 // official names. However, other frontends like `rustc` do not. This allows
19754 // users of these frontends to use the ABI names for registers in LLVM-style
19755 // register constraints.
19756 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
19757 .Case("{zero}", RISCV::X0)
19758 .Case("{ra}", RISCV::X1)
19759 .Case("{sp}", RISCV::X2)
19760 .Case("{gp}", RISCV::X3)
19761 .Case("{tp}", RISCV::X4)
19762 .Case("{t0}", RISCV::X5)
19763 .Case("{t1}", RISCV::X6)
19764 .Case("{t2}", RISCV::X7)
19765 .Cases("{s0}", "{fp}", RISCV::X8)
19766 .Case("{s1}", RISCV::X9)
19767 .Case("{a0}", RISCV::X10)
19768 .Case("{a1}", RISCV::X11)
19769 .Case("{a2}", RISCV::X12)
19770 .Case("{a3}", RISCV::X13)
19771 .Case("{a4}", RISCV::X14)
19772 .Case("{a5}", RISCV::X15)
19773 .Case("{a6}", RISCV::X16)
19774 .Case("{a7}", RISCV::X17)
19775 .Case("{s2}", RISCV::X18)
19776 .Case("{s3}", RISCV::X19)
19777 .Case("{s4}", RISCV::X20)
19778 .Case("{s5}", RISCV::X21)
19779 .Case("{s6}", RISCV::X22)
19780 .Case("{s7}", RISCV::X23)
19781 .Case("{s8}", RISCV::X24)
19782 .Case("{s9}", RISCV::X25)
19783 .Case("{s10}", RISCV::X26)
19784 .Case("{s11}", RISCV::X27)
19785 .Case("{t3}", RISCV::X28)
19786 .Case("{t4}", RISCV::X29)
19787 .Case("{t5}", RISCV::X30)
19788 .Case("{t6}", RISCV::X31)
19789 .Default(RISCV::NoRegister);
19790 if (XRegFromAlias != RISCV::NoRegister)
19791 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
19792
19793 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
19794 // TableGen record rather than the AsmName to choose registers for InlineAsm
19795 // constraints, plus we want to match those names to the widest floating point
19796 // register type available, manually select floating point registers here.
19797 //
19798 // The second case is the ABI name of the register, so that frontends can also
19799 // use the ABI names in register constraint lists.
19800 if (Subtarget.hasStdExtF()) {
19801 unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
19802 .Cases("{f0}", "{ft0}", RISCV::F0_F)
19803 .Cases("{f1}", "{ft1}", RISCV::F1_F)
19804 .Cases("{f2}", "{ft2}", RISCV::F2_F)
19805 .Cases("{f3}", "{ft3}", RISCV::F3_F)
19806 .Cases("{f4}", "{ft4}", RISCV::F4_F)
19807 .Cases("{f5}", "{ft5}", RISCV::F5_F)
19808 .Cases("{f6}", "{ft6}", RISCV::F6_F)
19809 .Cases("{f7}", "{ft7}", RISCV::F7_F)
19810 .Cases("{f8}", "{fs0}", RISCV::F8_F)
19811 .Cases("{f9}", "{fs1}", RISCV::F9_F)
19812 .Cases("{f10}", "{fa0}", RISCV::F10_F)
19813 .Cases("{f11}", "{fa1}", RISCV::F11_F)
19814 .Cases("{f12}", "{fa2}", RISCV::F12_F)
19815 .Cases("{f13}", "{fa3}", RISCV::F13_F)
19816 .Cases("{f14}", "{fa4}", RISCV::F14_F)
19817 .Cases("{f15}", "{fa5}", RISCV::F15_F)
19818 .Cases("{f16}", "{fa6}", RISCV::F16_F)
19819 .Cases("{f17}", "{fa7}", RISCV::F17_F)
19820 .Cases("{f18}", "{fs2}", RISCV::F18_F)
19821 .Cases("{f19}", "{fs3}", RISCV::F19_F)
19822 .Cases("{f20}", "{fs4}", RISCV::F20_F)
19823 .Cases("{f21}", "{fs5}", RISCV::F21_F)
19824 .Cases("{f22}", "{fs6}", RISCV::F22_F)
19825 .Cases("{f23}", "{fs7}", RISCV::F23_F)
19826 .Cases("{f24}", "{fs8}", RISCV::F24_F)
19827 .Cases("{f25}", "{fs9}", RISCV::F25_F)
19828 .Cases("{f26}", "{fs10}", RISCV::F26_F)
19829 .Cases("{f27}", "{fs11}", RISCV::F27_F)
19830 .Cases("{f28}", "{ft8}", RISCV::F28_F)
19831 .Cases("{f29}", "{ft9}", RISCV::F29_F)
19832 .Cases("{f30}", "{ft10}", RISCV::F30_F)
19833 .Cases("{f31}", "{ft11}", RISCV::F31_F)
19834 .Default(RISCV::NoRegister);
19835 if (FReg != RISCV::NoRegister) {
19836 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
19837 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
19838 unsigned RegNo = FReg - RISCV::F0_F;
19839 unsigned DReg = RISCV::F0_D + RegNo;
19840 return std::make_pair(DReg, &RISCV::FPR64RegClass);
19841 }
19842 if (VT == MVT::f32 || VT == MVT::Other)
19843 return std::make_pair(FReg, &RISCV::FPR32RegClass);
19844 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {
19845 unsigned RegNo = FReg - RISCV::F0_F;
19846 unsigned HReg = RISCV::F0_H + RegNo;
19847 return std::make_pair(HReg, &RISCV::FPR16RegClass);
19848 }
19849 }
19850 }
19851
19852 if (Subtarget.hasVInstructions()) {
19853 Register VReg = StringSwitch<Register>(Constraint.lower())
19854 .Case("{v0}", RISCV::V0)
19855 .Case("{v1}", RISCV::V1)
19856 .Case("{v2}", RISCV::V2)
19857 .Case("{v3}", RISCV::V3)
19858 .Case("{v4}", RISCV::V4)
19859 .Case("{v5}", RISCV::V5)
19860 .Case("{v6}", RISCV::V6)
19861 .Case("{v7}", RISCV::V7)
19862 .Case("{v8}", RISCV::V8)
19863 .Case("{v9}", RISCV::V9)
19864 .Case("{v10}", RISCV::V10)
19865 .Case("{v11}", RISCV::V11)
19866 .Case("{v12}", RISCV::V12)
19867 .Case("{v13}", RISCV::V13)
19868 .Case("{v14}", RISCV::V14)
19869 .Case("{v15}", RISCV::V15)
19870 .Case("{v16}", RISCV::V16)
19871 .Case("{v17}", RISCV::V17)
19872 .Case("{v18}", RISCV::V18)
19873 .Case("{v19}", RISCV::V19)
19874 .Case("{v20}", RISCV::V20)
19875 .Case("{v21}", RISCV::V21)
19876 .Case("{v22}", RISCV::V22)
19877 .Case("{v23}", RISCV::V23)
19878 .Case("{v24}", RISCV::V24)
19879 .Case("{v25}", RISCV::V25)
19880 .Case("{v26}", RISCV::V26)
19881 .Case("{v27}", RISCV::V27)
19882 .Case("{v28}", RISCV::V28)
19883 .Case("{v29}", RISCV::V29)
19884 .Case("{v30}", RISCV::V30)
19885 .Case("{v31}", RISCV::V31)
19886 .Default(RISCV::NoRegister);
19887 if (VReg != RISCV::NoRegister) {
19888 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
19889 return std::make_pair(VReg, &RISCV::VMRegClass);
19890 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
19891 return std::make_pair(VReg, &RISCV::VRRegClass);
19892 for (const auto *RC :
19893 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
19894 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
19895 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
19896 return std::make_pair(VReg, RC);
19897 }
19898 }
19899 }
19900 }
19901
19902 std::pair<Register, const TargetRegisterClass *> Res =
19904
19905 // If we picked one of the Zfinx register classes, remap it to the GPR class.
19906 // FIXME: When Zfinx is supported in CodeGen this will need to take the
19907 // Subtarget into account.
19908 if (Res.second == &RISCV::GPRF16RegClass ||
19909 Res.second == &RISCV::GPRF32RegClass ||
19910 Res.second == &RISCV::GPRPairRegClass)
19911 return std::make_pair(Res.first, &RISCV::GPRRegClass);
19912
19913 return Res;
19914}
19915
19918 // Currently only support length 1 constraints.
19919 if (ConstraintCode.size() == 1) {
19920 switch (ConstraintCode[0]) {
19921 case 'A':
19923 default:
19924 break;
19925 }
19926 }
19927
19928 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
19929}
19930
19932 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
19933 SelectionDAG &DAG) const {
19934 // Currently only support length 1 constraints.
19935 if (Constraint.size() == 1) {
19936 switch (Constraint[0]) {
19937 case 'I':
19938 // Validate & create a 12-bit signed immediate operand.
19939 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
19940 uint64_t CVal = C->getSExtValue();
19941 if (isInt<12>(CVal))
19942 Ops.push_back(
19943 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
19944 }
19945 return;
19946 case 'J':
19947 // Validate & create an integer zero operand.
19948 if (isNullConstant(Op))
19949 Ops.push_back(
19950 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
19951 return;
19952 case 'K':
19953 // Validate & create a 5-bit unsigned immediate operand.
19954 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
19955 uint64_t CVal = C->getZExtValue();
19956 if (isUInt<5>(CVal))
19957 Ops.push_back(
19958 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
19959 }
19960 return;
19961 case 'S':
19963 return;
19964 default:
19965 break;
19966 }
19967 }
19968 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
19969}
19970
19972 Instruction *Inst,
19973 AtomicOrdering Ord) const {
19974 if (Subtarget.hasStdExtZtso()) {
19975 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
19976 return Builder.CreateFence(Ord);
19977 return nullptr;
19978 }
19979
19980 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
19981 return Builder.CreateFence(Ord);
19982 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
19983 return Builder.CreateFence(AtomicOrdering::Release);
19984 return nullptr;
19985}
19986
19988 Instruction *Inst,
19989 AtomicOrdering Ord) const {
19990 if (Subtarget.hasStdExtZtso()) {
19991 if (isa<StoreInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
19992 return Builder.CreateFence(Ord);
19993 return nullptr;
19994 }
19995
19996 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
19997 return Builder.CreateFence(AtomicOrdering::Acquire);
19998 if (Subtarget.enableSeqCstTrailingFence() && isa<StoreInst>(Inst) &&
20001 return nullptr;
20002}
20003
20006 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
20007 // point operations can't be used in an lr/sc sequence without breaking the
20008 // forward-progress guarantee.
20009 if (AI->isFloatingPointOperation() ||
20013
20014 // Don't expand forced atomics, we want to have __sync libcalls instead.
20015 if (Subtarget.hasForcedAtomics())
20017
20018 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
20019 if (AI->getOperation() == AtomicRMWInst::Nand) {
20020 if (Subtarget.hasStdExtZacas() &&
20021 (Size >= 32 || Subtarget.hasStdExtZabha()))
20023 if (Size < 32)
20025 }
20026
20027 if (Size < 32 && !Subtarget.hasStdExtZabha())
20029
20031}
20032
20033static Intrinsic::ID
20035 if (XLen == 32) {
20036 switch (BinOp) {
20037 default:
20038 llvm_unreachable("Unexpected AtomicRMW BinOp");
20040 return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
20041 case AtomicRMWInst::Add:
20042 return Intrinsic::riscv_masked_atomicrmw_add_i32;
20043 case AtomicRMWInst::Sub:
20044 return Intrinsic::riscv_masked_atomicrmw_sub_i32;
20046 return Intrinsic::riscv_masked_atomicrmw_nand_i32;
20047 case AtomicRMWInst::Max:
20048 return Intrinsic::riscv_masked_atomicrmw_max_i32;
20049 case AtomicRMWInst::Min:
20050 return Intrinsic::riscv_masked_atomicrmw_min_i32;
20052 return Intrinsic::riscv_masked_atomicrmw_umax_i32;
20054 return Intrinsic::riscv_masked_atomicrmw_umin_i32;
20055 }
20056 }
20057
20058 if (XLen == 64) {
20059 switch (BinOp) {
20060 default:
20061 llvm_unreachable("Unexpected AtomicRMW BinOp");
20063 return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
20064 case AtomicRMWInst::Add:
20065 return Intrinsic::riscv_masked_atomicrmw_add_i64;
20066 case AtomicRMWInst::Sub:
20067 return Intrinsic::riscv_masked_atomicrmw_sub_i64;
20069 return Intrinsic::riscv_masked_atomicrmw_nand_i64;
20070 case AtomicRMWInst::Max:
20071 return Intrinsic::riscv_masked_atomicrmw_max_i64;
20072 case AtomicRMWInst::Min:
20073 return Intrinsic::riscv_masked_atomicrmw_min_i64;
20075 return Intrinsic::riscv_masked_atomicrmw_umax_i64;
20077 return Intrinsic::riscv_masked_atomicrmw_umin_i64;
20078 }
20079 }
20080
20081 llvm_unreachable("Unexpected XLen\n");
20082}
20083
20085 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
20086 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
20087 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
20088 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
20089 // mask, as this produces better code than the LR/SC loop emitted by
20090 // int_riscv_masked_atomicrmw_xchg.
20091 if (AI->getOperation() == AtomicRMWInst::Xchg &&
20092 isa<ConstantInt>(AI->getValOperand())) {
20093 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
20094 if (CVal->isZero())
20095 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
20096 Builder.CreateNot(Mask, "Inv_Mask"),
20097 AI->getAlign(), Ord);
20098 if (CVal->isMinusOne())
20099 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
20100 AI->getAlign(), Ord);
20101 }
20102
20103 unsigned XLen = Subtarget.getXLen();
20104 Value *Ordering =
20105 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
20106 Type *Tys[] = {AlignedAddr->getType()};
20107 Function *LrwOpScwLoop = Intrinsic::getDeclaration(
20108 AI->getModule(),
20110
20111 if (XLen == 64) {
20112 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
20113 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
20114 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
20115 }
20116
20117 Value *Result;
20118
20119 // Must pass the shift amount needed to sign extend the loaded value prior
20120 // to performing a signed comparison for min/max. ShiftAmt is the number of
20121 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
20122 // is the number of bits to left+right shift the value in order to
20123 // sign-extend.
20124 if (AI->getOperation() == AtomicRMWInst::Min ||
20126 const DataLayout &DL = AI->getModule()->getDataLayout();
20127 unsigned ValWidth =
20128 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
20129 Value *SextShamt =
20130 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
20131 Result = Builder.CreateCall(LrwOpScwLoop,
20132 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
20133 } else {
20134 Result =
20135 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
20136 }
20137
20138 if (XLen == 64)
20139 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
20140 return Result;
20141}
20142
20145 AtomicCmpXchgInst *CI) const {
20146 // Don't expand forced atomics, we want to have __sync libcalls instead.
20147 if (Subtarget.hasForcedAtomics())
20149
20151 if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) &&
20152 (Size == 8 || Size == 16))
20155}
20156
20158 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
20159 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
20160 unsigned XLen = Subtarget.getXLen();
20161 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
20162 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
20163 if (XLen == 64) {
20164 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
20165 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
20166 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
20167 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
20168 }
20169 Type *Tys[] = {AlignedAddr->getType()};
20170 Function *MaskedCmpXchg =
20171 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
20172 Value *Result = Builder.CreateCall(
20173 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
20174 if (XLen == 64)
20175 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
20176 return Result;
20177}
20178
20180 EVT DataVT) const {
20181 // We have indexed loads for all supported EEW types. Indices are always
20182 // zero extended.
20183 return Extend.getOpcode() == ISD::ZERO_EXTEND &&
20184 isTypeLegal(Extend.getValueType()) &&
20185 isTypeLegal(Extend.getOperand(0).getValueType()) &&
20186 Extend.getOperand(0).getValueType().getVectorElementType() != MVT::i1;
20187}
20188
20190 EVT VT) const {
20191 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
20192 return false;
20193
20194 switch (FPVT.getSimpleVT().SimpleTy) {
20195 case MVT::f16:
20196 return Subtarget.hasStdExtZfhmin();
20197 case MVT::f32:
20198 return Subtarget.hasStdExtF();
20199 case MVT::f64:
20200 return Subtarget.hasStdExtD();
20201 default:
20202 return false;
20203 }
20204}
20205
20207 // If we are using the small code model, we can reduce size of jump table
20208 // entry to 4 bytes.
20209 if (Subtarget.is64Bit() && !isPositionIndependent() &&
20212 }
20214}
20215
20217 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
20218 unsigned uid, MCContext &Ctx) const {
20219 assert(Subtarget.is64Bit() && !isPositionIndependent() &&
20221 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
20222}
20223
20225 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
20226 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
20227 // a power of two as well.
20228 // FIXME: This doesn't work for zve32, but that's already broken
20229 // elsewhere for the same reason.
20230 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
20231 static_assert(RISCV::RVVBitsPerBlock == 64,
20232 "RVVBitsPerBlock changed, audit needed");
20233 return true;
20234}
20235
20237 SDValue &Offset,
20239 SelectionDAG &DAG) const {
20240 // Target does not support indexed loads.
20241 if (!Subtarget.hasVendorXTHeadMemIdx())
20242 return false;
20243
20244 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
20245 return false;
20246
20247 Base = Op->getOperand(0);
20248 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
20249 int64_t RHSC = RHS->getSExtValue();
20250 if (Op->getOpcode() == ISD::SUB)
20251 RHSC = -(uint64_t)RHSC;
20252
20253 // The constants that can be encoded in the THeadMemIdx instructions
20254 // are of the form (sign_extend(imm5) << imm2).
20255 bool isLegalIndexedOffset = false;
20256 for (unsigned i = 0; i < 4; i++)
20257 if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
20258 isLegalIndexedOffset = true;
20259 break;
20260 }
20261
20262 if (!isLegalIndexedOffset)
20263 return false;
20264
20265 Offset = Op->getOperand(1);
20266 return true;
20267 }
20268
20269 return false;
20270}
20271
20273 SDValue &Offset,
20275 SelectionDAG &DAG) const {
20276 EVT VT;
20277 SDValue Ptr;
20278 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
20279 VT = LD->getMemoryVT();
20280 Ptr = LD->getBasePtr();
20281 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
20282 VT = ST->getMemoryVT();
20283 Ptr = ST->getBasePtr();
20284 } else
20285 return false;
20286
20287 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, DAG))
20288 return false;
20289
20290 AM = ISD::PRE_INC;
20291 return true;
20292}
20293
20295 SDValue &Base,
20296 SDValue &Offset,
20298 SelectionDAG &DAG) const {
20299 EVT VT;
20300 SDValue Ptr;
20301 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
20302 VT = LD->getMemoryVT();
20303 Ptr = LD->getBasePtr();
20304 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
20305 VT = ST->getMemoryVT();
20306 Ptr = ST->getBasePtr();
20307 } else
20308 return false;
20309
20310 if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG))
20311 return false;
20312 // Post-indexing updates the base, so it's not a valid transform
20313 // if that's not the same as the load's pointer.
20314 if (Ptr != Base)
20315 return false;
20316
20317 AM = ISD::POST_INC;
20318 return true;
20319}
20320
20322 EVT VT) const {
20323 EVT SVT = VT.getScalarType();
20324
20325 if (!SVT.isSimple())
20326 return false;
20327
20328 switch (SVT.getSimpleVT().SimpleTy) {
20329 case MVT::f16:
20330 return VT.isVector() ? Subtarget.hasVInstructionsF16()
20331 : Subtarget.hasStdExtZfhOrZhinx();
20332 case MVT::f32:
20333 return Subtarget.hasStdExtFOrZfinx();
20334 case MVT::f64:
20335 return Subtarget.hasStdExtDOrZdinx();
20336 default:
20337 break;
20338 }
20339
20340 return false;
20341}
20342
20344 // Zacas will use amocas.w which does not require extension.
20345 return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
20346}
20347
20349 const Constant *PersonalityFn) const {
20350 return RISCV::X10;
20351}
20352
20354 const Constant *PersonalityFn) const {
20355 return RISCV::X11;
20356}
20357
20359 // Return false to suppress the unnecessary extensions if the LibCall
20360 // arguments or return value is a float narrower than XLEN on a soft FP ABI.
20361 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
20362 Type.getSizeInBits() < Subtarget.getXLen()))
20363 return false;
20364
20365 return true;
20366}
20367
20369 if (Subtarget.is64Bit() && Type == MVT::i32)
20370 return true;
20371
20372 return IsSigned;
20373}
20374
20376 SDValue C) const {
20377 // Check integral scalar types.
20378 const bool HasExtMOrZmmul =
20379 Subtarget.hasStdExtM() || Subtarget.hasStdExtZmmul();
20380 if (!VT.isScalarInteger())
20381 return false;
20382
20383 // Omit the optimization if the sub target has the M extension and the data
20384 // size exceeds XLen.
20385 if (HasExtMOrZmmul && VT.getSizeInBits() > Subtarget.getXLen())
20386 return false;
20387
20388 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
20389 // Break the MUL to a SLLI and an ADD/SUB.
20390 const APInt &Imm = ConstNode->getAPIntValue();
20391 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
20392 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
20393 return true;
20394
20395 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
20396 if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
20397 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
20398 (Imm - 8).isPowerOf2()))
20399 return true;
20400
20401 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
20402 // a pair of LUI/ADDI.
20403 if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
20404 ConstNode->hasOneUse()) {
20405 APInt ImmS = Imm.ashr(Imm.countr_zero());
20406 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
20407 (1 - ImmS).isPowerOf2())
20408 return true;
20409 }
20410 }
20411
20412 return false;
20413}
20414
20416 SDValue ConstNode) const {
20417 // Let the DAGCombiner decide for vectors.
20418 EVT VT = AddNode.getValueType();
20419 if (VT.isVector())
20420 return true;
20421
20422 // Let the DAGCombiner decide for larger types.
20423 if (VT.getScalarSizeInBits() > Subtarget.getXLen())
20424 return true;
20425
20426 // It is worse if c1 is simm12 while c1*c2 is not.
20427 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
20428 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
20429 const APInt &C1 = C1Node->getAPIntValue();
20430 const APInt &C2 = C2Node->getAPIntValue();
20431 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
20432 return false;
20433
20434 // Default to true and let the DAGCombiner decide.
20435 return true;
20436}
20437
20439 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
20440 unsigned *Fast) const {
20441 if (!VT.isVector()) {
20442 if (Fast)
20443 *Fast = Subtarget.hasFastUnalignedAccess();
20444 return Subtarget.hasFastUnalignedAccess();
20445 }
20446
20447 // All vector implementations must support element alignment
20448 EVT ElemVT = VT.getVectorElementType();
20449 if (Alignment >= ElemVT.getStoreSize()) {
20450 if (Fast)
20451 *Fast = 1;
20452 return true;
20453 }
20454
20455 // Note: We lower an unmasked unaligned vector access to an equally sized
20456 // e8 element type access. Given this, we effectively support all unmasked
20457 // misaligned accesses. TODO: Work through the codegen implications of
20458 // allowing such accesses to be formed, and considered fast.
20459 if (Fast)
20460 *Fast = Subtarget.hasFastUnalignedAccess();
20461 return Subtarget.hasFastUnalignedAccess();
20462}
20463
20464
20466 const AttributeList &FuncAttributes) const {
20467 if (!Subtarget.hasVInstructions())
20468 return MVT::Other;
20469
20470 if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
20471 return MVT::Other;
20472
20473 // We use LMUL1 memory operations here for a non-obvious reason. Our caller
20474 // has an expansion threshold, and we want the number of hardware memory
20475 // operations to correspond roughly to that threshold. LMUL>1 operations
20476 // are typically expanded linearly internally, and thus correspond to more
20477 // than one actual memory operation. Note that store merging and load
20478 // combining will typically form larger LMUL operations from the LMUL1
20479 // operations emitted here, and that's okay because combining isn't
20480 // introducing new memory operations; it's just merging existing ones.
20481 const unsigned MinVLenInBytes = Subtarget.getRealMinVLen()/8;
20482 if (Op.size() < MinVLenInBytes)
20483 // TODO: Figure out short memops. For the moment, do the default thing
20484 // which ends up using scalar sequences.
20485 return MVT::Other;
20486
20487 // Prefer i8 for non-zero memset as it allows us to avoid materializing
20488 // a large scalar constant and instead use vmv.v.x/i to do the
20489 // broadcast. For everything else, prefer ELenVT to minimize VL and thus
20490 // maximize the chance we can encode the size in the vsetvli.
20491 MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());
20492 MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
20493
20494 // Do we have sufficient alignment for our preferred VT? If not, revert
20495 // to largest size allowed by our alignment criteria.
20496 if (PreferredVT != MVT::i8 && !Subtarget.hasFastUnalignedAccess()) {
20497 Align RequiredAlign(PreferredVT.getStoreSize());
20498 if (Op.isFixedDstAlign())
20499 RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());
20500 if (Op.isMemcpy())
20501 RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign());
20502 PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8);
20503 }
20504 return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());
20505}
20506
20508 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
20509 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
20510 bool IsABIRegCopy = CC.has_value();
20511 EVT ValueVT = Val.getValueType();
20512 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
20513 PartVT == MVT::f32) {
20514 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
20515 // nan, and cast to f32.
20516 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
20517 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
20518 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
20519 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
20520 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
20521 Parts[0] = Val;
20522 return true;
20523 }
20524
20525 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
20526 LLVMContext &Context = *DAG.getContext();
20527 EVT ValueEltVT = ValueVT.getVectorElementType();
20528 EVT PartEltVT = PartVT.getVectorElementType();
20529 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
20530 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
20531 if (PartVTBitSize % ValueVTBitSize == 0) {
20532 assert(PartVTBitSize >= ValueVTBitSize);
20533 // If the element types are different, bitcast to the same element type of
20534 // PartVT first.
20535 // Give an example here, we want copy a <vscale x 1 x i8> value to
20536 // <vscale x 4 x i16>.
20537 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
20538 // subvector, then we can bitcast to <vscale x 4 x i16>.
20539 if (ValueEltVT != PartEltVT) {
20540 if (PartVTBitSize > ValueVTBitSize) {
20541 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
20542 assert(Count != 0 && "The number of element should not be zero.");
20543 EVT SameEltTypeVT =
20544 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
20545 Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT,
20546 DAG.getUNDEF(SameEltTypeVT), Val,
20547 DAG.getVectorIdxConstant(0, DL));
20548 }
20549 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
20550 } else {
20551 Val =
20552 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
20553 Val, DAG.getVectorIdxConstant(0, DL));
20554 }
20555 Parts[0] = Val;
20556 return true;
20557 }
20558 }
20559 return false;
20560}
20561
20563 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
20564 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
20565 bool IsABIRegCopy = CC.has_value();
20566 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
20567 PartVT == MVT::f32) {
20568 SDValue Val = Parts[0];
20569
20570 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
20571 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
20572 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
20573 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
20574 return Val;
20575 }
20576
20577 if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
20578 LLVMContext &Context = *DAG.getContext();
20579 SDValue Val = Parts[0];
20580 EVT ValueEltVT = ValueVT.getVectorElementType();
20581 EVT PartEltVT = PartVT.getVectorElementType();
20582 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
20583 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
20584 if (PartVTBitSize % ValueVTBitSize == 0) {
20585 assert(PartVTBitSize >= ValueVTBitSize);
20586 EVT SameEltTypeVT = ValueVT;
20587 // If the element types are different, convert it to the same element type
20588 // of PartVT.
20589 // Give an example here, we want copy a <vscale x 1 x i8> value from
20590 // <vscale x 4 x i16>.
20591 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
20592 // then we can extract <vscale x 1 x i8>.
20593 if (ValueEltVT != PartEltVT) {
20594 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
20595 assert(Count != 0 && "The number of element should not be zero.");
20596 SameEltTypeVT =
20597 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
20598 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
20599 }
20600 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
20601 DAG.getVectorIdxConstant(0, DL));
20602 return Val;
20603 }
20604 }
20605 return SDValue();
20606}
20607
20609 // When aggressively optimizing for code size, we prefer to use a div
20610 // instruction, as it is usually smaller than the alternative sequence.
20611 // TODO: Add vector division?
20612 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
20613 return OptSize && !VT.isVector();
20614}
20615
20617 // Scalarize zero_ext and sign_ext might stop match to widening instruction in
20618 // some situation.
20619 unsigned Opc = N->getOpcode();
20620 if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND)
20621 return false;
20622 return true;
20623}
20624
20625static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
20626 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
20627 Function *ThreadPointerFunc =
20628 Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
20629 return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
20630 IRB.CreateCall(ThreadPointerFunc), Offset);
20631}
20632
20634 // Fuchsia provides a fixed TLS slot for the stack cookie.
20635 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
20636 if (Subtarget.isTargetFuchsia())
20637 return useTpOffset(IRB, -0x10);
20638
20640}
20641
20643 VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace,
20644 const DataLayout &DL) const {
20645 EVT VT = getValueType(DL, VTy);
20646 // Don't lower vlseg/vsseg for vector types that can't be split.
20647 if (!isTypeLegal(VT))
20648 return false;
20649
20651 !allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace,
20652 Alignment))
20653 return false;
20654
20655 MVT ContainerVT = VT.getSimpleVT();
20656
20657 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
20658 if (!Subtarget.useRVVForFixedLengthVectors())
20659 return false;
20660 // Sometimes the interleaved access pass picks up splats as interleaves of
20661 // one element. Don't lower these.
20662 if (FVTy->getNumElements() < 2)
20663 return false;
20664
20666 }
20667
20668 // Need to make sure that EMUL * NFIELDS ≤ 8
20669 auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(getLMUL(ContainerVT));
20670 if (Fractional)
20671 return true;
20672 return Factor * LMUL <= 8;
20673}
20674
20676 Align Alignment) const {
20677 if (!Subtarget.hasVInstructions())
20678 return false;
20679
20680 // Only support fixed vectors if we know the minimum vector size.
20681 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
20682 return false;
20683
20684 EVT ScalarType = DataType.getScalarType();
20685 if (!isLegalElementTypeForRVV(ScalarType))
20686 return false;
20687
20688 if (!Subtarget.hasFastUnalignedAccess() &&
20689 Alignment < ScalarType.getStoreSize())
20690 return false;
20691
20692 return true;
20693}
20694
20696 Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load,
20697 Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load,
20698 Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load,
20699 Intrinsic::riscv_seg8_load};
20700
20701/// Lower an interleaved load into a vlsegN intrinsic.
20702///
20703/// E.g. Lower an interleaved load (Factor = 2):
20704/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
20705/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
20706/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
20707///
20708/// Into:
20709/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64(
20710/// %ptr, i64 4)
20711/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
20712/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
20715 ArrayRef<unsigned> Indices, unsigned Factor) const {
20716 IRBuilder<> Builder(LI);
20717
20718 auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType());
20719 if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(),
20721 LI->getModule()->getDataLayout()))
20722 return false;
20723
20724 auto *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
20725
20726 Function *VlsegNFunc =
20728 {VTy, LI->getPointerOperandType(), XLenTy});
20729
20730 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
20731
20732 CallInst *VlsegN =
20733 Builder.CreateCall(VlsegNFunc, {LI->getPointerOperand(), VL});
20734
20735 for (unsigned i = 0; i < Shuffles.size(); i++) {
20736 Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]);
20737 Shuffles[i]->replaceAllUsesWith(SubVec);
20738 }
20739
20740 return true;
20741}
20742
20744 Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store,
20745 Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store,
20746 Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store,
20747 Intrinsic::riscv_seg8_store};
20748
20749/// Lower an interleaved store into a vssegN intrinsic.
20750///
20751/// E.g. Lower an interleaved store (Factor = 3):
20752/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
20753/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
20754/// store <12 x i32> %i.vec, <12 x i32>* %ptr
20755///
20756/// Into:
20757/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
20758/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
20759/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
20760/// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2,
20761/// %ptr, i32 4)
20762///
20763/// Note that the new shufflevectors will be removed and we'll only generate one
20764/// vsseg3 instruction in CodeGen.
20766 ShuffleVectorInst *SVI,
20767 unsigned Factor) const {
20768 IRBuilder<> Builder(SI);
20769 auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType());
20770 // Given SVI : <n*factor x ty>, then VTy : <n x ty>
20771 auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(),
20772 ShuffleVTy->getNumElements() / Factor);
20773 if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(),
20774 SI->getPointerAddressSpace(),
20775 SI->getModule()->getDataLayout()))
20776 return false;
20777
20778 auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
20779
20780 Function *VssegNFunc =
20781 Intrinsic::getDeclaration(SI->getModule(), FixedVssegIntrIds[Factor - 2],
20782 {VTy, SI->getPointerOperandType(), XLenTy});
20783
20784 auto Mask = SVI->getShuffleMask();
20786
20787 for (unsigned i = 0; i < Factor; i++) {
20788 Value *Shuffle = Builder.CreateShuffleVector(
20789 SVI->getOperand(0), SVI->getOperand(1),
20790 createSequentialMask(Mask[i], VTy->getNumElements(), 0));
20791 Ops.push_back(Shuffle);
20792 }
20793 // This VL should be OK (should be executable in one vsseg instruction,
20794 // potentially under larger LMULs) because we checked that the fixed vector
20795 // type fits in isLegalInterleavedAccessType
20796 Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
20797 Ops.append({SI->getPointerOperand(), VL});
20798
20799 Builder.CreateCall(VssegNFunc, Ops);
20800
20801 return true;
20802}
20803
20805 LoadInst *LI) const {
20806 assert(LI->isSimple());
20807 IRBuilder<> Builder(LI);
20808
20809 // Only deinterleave2 supported at present.
20810 if (DI->getIntrinsicID() != Intrinsic::experimental_vector_deinterleave2)
20811 return false;
20812
20813 unsigned Factor = 2;
20814
20815 VectorType *VTy = cast<VectorType>(DI->getOperand(0)->getType());
20816 VectorType *ResVTy = cast<VectorType>(DI->getType()->getContainedType(0));
20817
20818 if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),
20820 LI->getModule()->getDataLayout()))
20821 return false;
20822
20823 Function *VlsegNFunc;
20824 Value *VL;
20825 Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
20827
20828 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
20829 VlsegNFunc = Intrinsic::getDeclaration(
20830 LI->getModule(), FixedVlsegIntrIds[Factor - 2],
20831 {ResVTy, LI->getPointerOperandType(), XLenTy});
20832 VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
20833 } else {
20834 static const Intrinsic::ID IntrIds[] = {
20835 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
20836 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
20837 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
20838 Intrinsic::riscv_vlseg8};
20839
20840 VlsegNFunc = Intrinsic::getDeclaration(LI->getModule(), IntrIds[Factor - 2],
20841 {ResVTy, XLenTy});
20842 VL = Constant::getAllOnesValue(XLenTy);
20843 Ops.append(Factor, PoisonValue::get(ResVTy));
20844 }
20845
20846 Ops.append({LI->getPointerOperand(), VL});
20847
20848 Value *Vlseg = Builder.CreateCall(VlsegNFunc, Ops);
20849 DI->replaceAllUsesWith(Vlseg);
20850
20851 return true;
20852}
20853
20855 StoreInst *SI) const {
20856 assert(SI->isSimple());
20857 IRBuilder<> Builder(SI);
20858
20859 // Only interleave2 supported at present.
20860 if (II->getIntrinsicID() != Intrinsic::experimental_vector_interleave2)
20861 return false;
20862
20863 unsigned Factor = 2;
20864
20865 VectorType *VTy = cast<VectorType>(II->getType());
20866 VectorType *InVTy = cast<VectorType>(II->getOperand(0)->getType());
20867
20868 if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(),
20869 SI->getPointerAddressSpace(),
20870 SI->getModule()->getDataLayout()))
20871 return false;
20872
20873 Function *VssegNFunc;
20874 Value *VL;
20875 Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
20876
20877 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
20878 VssegNFunc = Intrinsic::getDeclaration(
20879 SI->getModule(), FixedVssegIntrIds[Factor - 2],
20880 {InVTy, SI->getPointerOperandType(), XLenTy});
20881 VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
20882 } else {
20883 static const Intrinsic::ID IntrIds[] = {
20884 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
20885 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
20886 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
20887 Intrinsic::riscv_vsseg8};
20888
20889 VssegNFunc = Intrinsic::getDeclaration(SI->getModule(), IntrIds[Factor - 2],
20890 {InVTy, XLenTy});
20891 VL = Constant::getAllOnesValue(XLenTy);
20892 }
20893
20894 Builder.CreateCall(VssegNFunc, {II->getOperand(0), II->getOperand(1),
20895 SI->getPointerOperand(), VL});
20896
20897 return true;
20898}
20899
20903 const TargetInstrInfo *TII) const {
20904 assert(MBBI->isCall() && MBBI->getCFIType() &&
20905 "Invalid call instruction for a KCFI check");
20906 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
20907 MBBI->getOpcode()));
20908
20909 MachineOperand &Target = MBBI->getOperand(0);
20910 Target.setIsRenamable(false);
20911
20912 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
20913 .addReg(Target.getReg())
20914 .addImm(MBBI->getCFIType())
20915 .getInstr();
20916}
20917
20918#define GET_REGISTER_MATCHER
20919#include "RISCVGenAsmMatcher.inc"
20920
20923 const MachineFunction &MF) const {
20925 if (Reg == RISCV::NoRegister)
20927 if (Reg == RISCV::NoRegister)
20929 Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
20930 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
20931 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
20932 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
20933 StringRef(RegName) + "\"."));
20934 return Reg;
20935}
20936
20939 const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);
20940
20941 if (NontemporalInfo == nullptr)
20943
20944 // 1 for default value work as __RISCV_NTLH_ALL
20945 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
20946 // 3 -> __RISCV_NTLH_ALL_PRIVATE
20947 // 4 -> __RISCV_NTLH_INNERMOST_SHARED
20948 // 5 -> __RISCV_NTLH_ALL
20949 int NontemporalLevel = 5;
20950 const MDNode *RISCVNontemporalInfo =
20951 I.getMetadata("riscv-nontemporal-domain");
20952 if (RISCVNontemporalInfo != nullptr)
20953 NontemporalLevel =
20954 cast<ConstantInt>(
20955 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
20956 ->getValue())
20957 ->getZExtValue();
20958
20959 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
20960 "RISC-V target doesn't support this non-temporal domain.");
20961
20962 NontemporalLevel -= 2;
20964 if (NontemporalLevel & 0b1)
20965 Flags |= MONontemporalBit0;
20966 if (NontemporalLevel & 0b10)
20967 Flags |= MONontemporalBit1;
20968
20969 return Flags;
20970}
20971
20974
20975 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
20977 TargetFlags |= (NodeFlags & MONontemporalBit0);
20978 TargetFlags |= (NodeFlags & MONontemporalBit1);
20979 return TargetFlags;
20980}
20981
20983 const MemSDNode &NodeX, const MemSDNode &NodeY) const {
20984 return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
20985}
20986
20988 if (VT.isScalableVector())
20989 return isTypeLegal(VT) && Subtarget.hasStdExtZvbb();
20990 if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb())
20991 return true;
20992 return Subtarget.hasStdExtZbb() &&
20993 (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector());
20994}
20995
20997 ISD::CondCode Cond) const {
20998 return isCtpopFast(VT) ? 0 : 1;
20999}
21000
21002
21003 // GISel support is in progress or complete for these opcodes.
21004 unsigned Op = Inst.getOpcode();
21005 if (Op == Instruction::Add || Op == Instruction::Sub ||
21006 Op == Instruction::And || Op == Instruction::Or ||
21007 Op == Instruction::Xor || Op == Instruction::InsertElement ||
21008 Op == Instruction::ShuffleVector || Op == Instruction::Load)
21009 return false;
21010
21011 if (Inst.getType()->isScalableTy())
21012 return true;
21013
21014 for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
21015 if (Inst.getOperand(i)->getType()->isScalableTy() &&
21016 !isa<ReturnInst>(&Inst))
21017 return true;
21018
21019 if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
21020 if (AI->getAllocatedType()->isScalableTy())
21021 return true;
21022 }
21023
21024 return false;
21025}
21026
21027SDValue
21028RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
21029 SelectionDAG &DAG,
21030 SmallVectorImpl<SDNode *> &Created) const {
21032 if (isIntDivCheap(N->getValueType(0), Attr))
21033 return SDValue(N, 0); // Lower SDIV as SDIV
21034
21035 // Only perform this transform if short forward branch opt is supported.
21036 if (!Subtarget.hasShortForwardBranchOpt())
21037 return SDValue();
21038 EVT VT = N->getValueType(0);
21039 if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
21040 return SDValue();
21041
21042 // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
21043 if (Divisor.sgt(2048) || Divisor.slt(-2048))
21044 return SDValue();
21045 return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
21046}
21047
21048bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
21049 EVT VT, const APInt &AndMask) const {
21050 if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())
21051 return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024);
21053}
21054
21055unsigned RISCVTargetLowering::getMinimumJumpTableEntries() const {
21056 return Subtarget.getMinimumJumpTableEntries();
21057}
21058
21060
21061#define GET_RISCVVIntrinsicsTable_IMPL
21062#include "RISCVGenSearchableTables.inc"
21063
21064} // namespace llvm::RISCVVIntrinsicsTable
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
#define NODE_NAME_CASE(node)
static bool isConstant(const MachineInstr &MI)
amdgpu AMDGPU Register Bank Select
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
#define NL
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define Check(C,...)
#define im(i)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define RegName(no)
const MCPhysReg ArgFPR32s[]
const MCPhysReg ArgVRs[]
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
const MCPhysReg ArgFPR64s[]
const MCPhysReg ArgGPRs[]
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
loop Loop Strength Reduction
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
LLVMContext & Context
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static bool IsSelect(MachineInstr &MI)
const char LLVMTargetMachineRef TM
R600 Clause Merge
static StringRef getExtensionType(StringRef Ext)
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerSADDO_SSUBO(SDValue Op, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, unsigned RelOpcode, unsigned EqOpcode, const RISCVSubtarget &Subtarget)
static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef< int > Mask)
Match shuffles that concatenate two vectors, rotate the concatenation, and then extract the original ...
static const Intrinsic::ID FixedVlsegIntrIds[]
static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MVT getLMUL1VT(MVT VT)
static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2, bool EABI)
static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match v(f)slide1up/down idioms.
static MachineBasicBlock * emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned CVTXOpc, unsigned CVTFOpc)
static const MCPhysReg ArgVRM2s[]
static bool isInterleaveShuffle(ArrayRef< int > Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget)
Is this shuffle interleaving contiguous elements from one vector into the even elements and contiguou...
static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG)
According to the property that indexed load/store instructions zero-extend their indices,...
static void promoteVCIXScalar(const SDValue &Op, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Scalar, SDValue VL, SelectionDAG &DAG)
static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode)
static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Lo, SDValue Hi, SDValue VL, SelectionDAG &DAG)
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, SelectionDAG &DAG)
Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.
static cl::opt< int > FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, cl::desc("Give the maximum number of instructions that we will " "use for creating a floating-point immediate value"), cl::init(2))
static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performVWADDSUBW_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, Align BaseAlign, const RISCVSubtarget &ST)
Match the index of a gather or scatter operation as an operation with twice the element width and hal...
static bool isLegalBitRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, MVT &RotateVT, unsigned &RotateAmt)
static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG)
static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, SelectionDAG &DAG)
static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, MachineBasicBlock *ThisMBB, const RISCVSubtarget &Subtarget)
static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG)
static std::optional< uint64_t > getExactInteger(const APFloat &APF, uint32_t BitWidth)
static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG)
static SDValue performMemPairCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG)
static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static unsigned getRVVReductionOp(unsigned ISDOpcode)
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MVT getMaskTypeFor(MVT VecVT)
Return the type of the mask type suitable for masking the provided vector type.
static SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static cl::opt< unsigned > NumRepeatedDivisors(DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, cl::desc("Set the minimum number of repetitions of a divisor to allow " "transformation to multiplications by the reciprocal"), cl::init(2))
static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG)
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool hasMaskOp(unsigned Opcode)
Return true if a RISC-V target specified op has a mask operand.
static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, ISD::MemIndexType &IndexType, RISCVTargetLowering::DAGCombinerInfo &DCI)
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static unsigned getRISCVVLOp(SDValue Op)
Get a RISC-V target specified VL op for a given SDNode.
static unsigned getVecReduceOpcode(unsigned Opc)
Given a binary operator, return the associative generic ISD::VECREDUCE_OP which corresponds to it.
static std::pair< SDValue, SDValue > getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INT_SATCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, SDValue StartValue, SDValue Vec, SDValue Mask, SDValue VL, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Helper to lower a reduction sequence of the form: scalar = reduce_op vec, scalar_start.
static unsigned allocateRVVReg(MVT ValVT, unsigned ValNo, std::optional< unsigned > FirstMaskArgument, CCState &State, const RISCVTargetLowering &TLI)
static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::pair< SDValue, SDValue > getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< unsigned > preAssignMask(const ArgTy &Args)
static SDValue getVLOperand(SDValue Op)
static MachineBasicBlock * emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, const RISCVSubtarget &Subtarget)
static cl::opt< bool > RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden, cl::desc("Make i32 a legal type for SelectionDAG on RV64."))
static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorXRINT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > ExtensionMaxWebSize(DEBUG_TYPE "-ext-max-web-size", cl::Hidden, cl::desc("Give the maximum size (in number of nodes) of the web of " "instructions that we will consider for VW expansion"), cl::init(18))
static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG)
static SDValue getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static bool isSelectPseudo(MachineInstr &MI)
static std::optional< MVT > getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool useRVVForFixedLengthVectorVT(MVT VT, const RISCVSubtarget &Subtarget)
static Value * useTpOffset(IRBuilderBase &IRB, unsigned Offset)
static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * emitSplitF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG)
static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc)
static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerSMULO(SDValue Op, SelectionDAG &DAG)
static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, SDValue TrueVal, SDValue FalseVal, bool Swapped)
#define VP_CASE(NODE)
static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, SmallVector< int > &ShuffleMask)
Match the index vector of a scatter or gather node as the shuffle mask which performs the rearrangeme...
static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG)
static bool hasMergeOp(unsigned Opcode)
Return true if a RISC-V target specified op has a merge operand.
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static void processVCIXOperands(SDValue &OrigOp, SmallVectorImpl< SDValue > &Operands, SelectionDAG &DAG)
static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< VIDSequence > isSimpleVIDSequence(SDValue Op, unsigned EltSizeInBits)
static SDValue getDeinterleaveViaVNSRL(const SDLoc &DL, MVT VT, SDValue Src, bool EvenElts, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue lowerUADDSAT_USUBSAT(SDValue Op, SelectionDAG &DAG)
static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC)
static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc)
static SDValue lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isValidEGW(int EGS, EVT VT, const RISCVSubtarget &Subtarget)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isNonZeroAVL(SDValue AVL)
#define DEBUG_TYPE
static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Combine a binary operation to its equivalent VW or VW_W form.
static SDValue getVCIXISDNodeWCHAIN(SDValue &Op, SelectionDAG &DAG, unsigned Type)
static ArrayRef< MCPhysReg > getFastCCArgGPRs(const RISCVABI::ABI ABI)
static const MCPhysReg ArgVRM8s[]
static MachineBasicBlock * emitReadCounterWidePseudo(MachineInstr &MI, MachineBasicBlock *BB)
static const MCPhysReg ArgVRM4s[]
static cl::opt< bool > AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, cl::desc("Allow the formation of VW_W operations (e.g., " "VWADD_W) with splat constants"), cl::init(false))
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static SDValue lowerSADDSAT_SSUBSAT(SDValue Op, SelectionDAG &DAG)
static SDValue getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, LSBaseSDNode *LSNode2, SDValue BasePtr, uint64_t Imm)
static std::tuple< unsigned, SDValue, SDValue > getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static const MCPhysReg ArgFPR16s[]
static SDValue combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Perform two related transforms whose purpose is to incrementally recognize an explode_vector followed...
static SDValue performVFMADD_VLCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try and optimize BUILD_VECTORs with "dominant values" - these are values which constitute a large pro...
static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
If we have a build_vector where each lane is binop X, C, where C is a constant (but not necessarily t...
#define OP_CASE(NODE)
static const Intrinsic::ID FixedVssegIntrIds[]
const SmallVectorImpl< MachineOperand > & Cond
#define ROTR(x, n)
Definition: SHA256.cpp:32
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isCommutative(Instruction *I)
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
static constexpr int Concat[]
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1193
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition: APFloat.h:1185
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition: APFloat.h:977
Class for arbitrary precision integers.
Definition: APInt.h:76
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:207
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1364
uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const
Definition: APInt.cpp:489
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1463
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1179
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:349
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1160
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:187
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:307
void clearAllBits()
Set every bit to 0.
Definition: APInt.h:1375
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1589
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:413
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:197
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1482
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:368
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:284
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1108
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition: APInt.h:1367
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:264
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1513
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
an instruction to allocate memory on the stack
Definition: Instructions.h:59
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:539
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:748
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:867
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:760
@ Add
*p = old + v
Definition: Instructions.h:764
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:778
@ Or
*p = old | v
Definition: Instructions.h:772
@ Sub
*p = old - v
Definition: Instructions.h:766
@ And
*p = old & v
Definition: Instructions.h:768
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:800
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:776
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:782
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:780
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:804
@ Nand
*p = ~(old & v)
Definition: Instructions.h:770
bool isFloatingPointOperation() const
Definition: Instructions.h:922
BinOp getOperation() const
Definition: Instructions.h:845
Value * getValOperand()
Definition: Instructions.h:914
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:887
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:349
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:205
bool test(unsigned Idx) const
Definition: BitVector.h:461
BitVector & set()
Definition: BitVector.h:351
bool all() const
all - Returns true if all bits are set.
Definition: BitVector.h:175
CCState - This class holds information needed while lowering arguments and return values.
MachineFunction & getMachineFunction() const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
SmallVectorImpl< ISD::ArgFlagsTy > & getPendingArgFlags()
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
SmallVectorImpl< CCValAssign > & getPendingLocs()
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
bool isMemLoc() const
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:217
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:205
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:154
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:417
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:410
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
Diagnostic information for unsupported feature in backend.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition: TypeSize.h:299
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:296
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:200
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:703
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:262
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:338
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:350
Argument * getArg(unsigned i) const
Definition: Function.h:831
bool isDSOLocal() const
Definition: GlobalValue.h:305
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:528
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:655
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Store the specified register of the given register class to the specified stack frame index.
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
Load the specified register of the given register class from the specified stack frame index.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2001
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1875
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2494
FenceInst * CreateFence(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System, const Twine &Name="")
Definition: IRBuilder.h:1828
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2017
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:520
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:174
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:525
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1743
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1338
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:491
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2472
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1848
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2390
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:510
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2644
static InstructionCost getInvalid(CostType Val=0)
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:80
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:252
Class to represent integer types.
Definition: DerivedTypes.h:40
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:54
static constexpr LLT scalable_vector(unsigned MinNumElements, unsigned ScalarSizeInBits)
Get a low-level scalable vector of some number of elements and element width.
Definition: LowLevelType.h:113
constexpr bool isValid() const
Definition: LowLevelType.h:145
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:193
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Base class for LoadSDNode and StoreSDNode.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
An instruction for reading from memory.
Definition: Instructions.h:184
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:286
Value * getPointerOperand()
Definition: Instructions.h:280
bool isSimple() const
Definition: Instructions.h:272
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:236
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Context object for machine code objects.
Definition: MCContext.h:76
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:397
Metadata node.
Definition: Metadata.h:1067
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1428
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
bool bitsLT(MVT VT) const
Return true if this has less bits than VT.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
uint64_t getScalarStoreSize() const
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool bitsGE(MVT VT) const
Return true if this has no less bits than VT.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
bool isValid() const
Return true if this is a valid simple valuetype.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
static auto integer_scalable_vector_valuetypes()
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
static auto fp_fixedlen_vector_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
void push_back(MachineInstr *MI)
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Instructions::iterator instr_iterator
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:69
void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
void setFlag(MIFlag Flag)
Set a MI flag.
Definition: MachineInstr.h:386
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:556
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
This is an abstract virtual class for memory operations.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:287
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
RISCVABI::ABI getTargetABI() const
unsigned getMinimumJumpTableEntries() const
bool hasStdExtCOrZca() const
unsigned getMaxLMULForFixedLengthVectors() const
bool hasVInstructionsI64() const
bool hasVInstructionsF64() const
bool hasStdExtDOrZdinx() const
bool hasStdExtZfhOrZhinx() const
unsigned getRealMinVLen() const
bool useRVVForFixedLengthVectors() const
bool isTargetFuchsia() const
unsigned getDLenFactor() const
bool hasVInstructionsF16Minimal() const
unsigned getXLen() const
bool hasConditionalMoveFusion() const
bool isRegisterReservedByUser(Register i) const
bool hasVInstructionsF16() const
bool hasVInstructionsBF16() const
unsigned getMaxBuildIntsCost() const
Align getPrefLoopAlignment() const
bool hasVInstructions() const
std::optional< unsigned > getRealVLen() const
bool useConstantPoolForLargeInts() const
Align getPrefFunctionAlignment() const
bool hasStdExtZfhminOrZhinxmin() const
unsigned getRealMaxVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVInstrInfo * getInstrInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
bool hasVInstructionsF32() const
unsigned getELen() const
bool hasStdExtFOrZfinx() const
bool isSoftFPABI() const
unsigned getFLen() const
static std::pair< unsigned, unsigned > computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget)
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
bool shouldSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
std::pair< int, bool > getLegalZfaFPImm(const APFloat &Imm, EVT VT) const
RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI)
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const override
This method returns the constant pool value that will be loaded by LD.
const RISCVSubtarget & getSubtarget() const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II, StoreInst *SI) const override
Lower an interleave intrinsic to a target specific store intrinsic.
bool preferScalarizeSplat(SDNode *N) const override
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool canSplatOperand(Instruction *I, int Operand) const
Return true if the (vector) instruction I will be lowered to an instruction with a scalar splat opera...
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Return true if it is beneficial to convert a load of a constant to just the constant itself.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the register type for a given MVT, ensuring vectors are treated as a series of gpr sized integ...
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const override
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
bool hasBitTest(SDValue X, SDValue Y) const override
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
bool fallBackToDAGISel(const Instruction &Inst) const override
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a vlsegN intrinsic.
bool isCtpopFast(EVT VT) const override
Return true if ctpop instruction is fast.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
MVT getContainerForFixedLengthVector(MVT VT) const
static unsigned getRegClassIDForVecVT(MVT VT)
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Returns the register with the specified architectural or ABI name.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul)
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a vssegN intrinsic.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
bool isLegalElementTypeForRVV(EVT ScalarTy) const
bool isVScaleKnownToBeAPowerOfTwo() const override
Return true only if vscale must be a power of two.
bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *II, LoadInst *LI) const override
Lower a deinterleave intrinsic to a target specific load intrinsic.
static RISCVII::VLMUL getLMUL(MVT VT)
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the number of registers for a given MVT, ensuring vectors are treated as a series of gpr sized...
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, const DataLayout &) const
Returns whether or not generating a interleaved load/store intrinsic for this type will be legal.
bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override
Return true if Op can create undef or poison from non-undef & non-poison operands.
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const
Return true if a stride load store of the given result type and alignment is legal.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
bool isUndef() const
Return true if the type of the node type undefined.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:722
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT, SDNodeFlags Flags)
Get the (commutative) neutral element for the given opcode, if it exists.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC, bool ConstantFold=true)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
bool shouldOptForSize() const
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, SDValue Mask, SDValue EVL)
Convert a vector-predicated Op, which must be an integer vector, to the vector-type VT,...
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:478
bool NewNodesMustHaveLegalTypes
When true, additional steps are taken to ensure that getConstant() and similar functions return DAG n...
Definition: SelectionDAG.h:387
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:732
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:828
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition: SelectionDAG.h:659
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:862
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
static const fltSemantics & EVTToAPFloatSemantics(EVT VT)
Returns an APFloat semantics tag appropriate for the given type.
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:473
std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
std::pair< SDValue, SDValue > SplitEVL(SDValue N, EVT VecVT, const SDLoc &DL)
Split the explicit vector length parameter of a VP operation.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:773
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:676
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:768
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:799
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:845
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops)
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
Definition: SelectionDAG.h:485
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL, bool LegalTypes=true)
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:739
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:878
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
This instruction constructs a fixed permutation of two input vectors.
static bool isBitRotateMask(ArrayRef< int > Mask, unsigned EltSizeInBits, unsigned MinSubElts, unsigned MaxSubElts, unsigned &NumSubElts, unsigned &RotateAmt)
Checks if the shuffle is a bit rotation of the first operand across multiple subelements,...
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
static bool isInsertSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &NumSubElts, int &Index)
Return true if this shuffle mask is an insert subvector mask.
static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
static bool isSplatMask(const int *Mask, EVT VT)
ArrayRef< int > getMask() const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
An instruction for storing to memory.
Definition: Instructions.h:317
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
std::string lower() const
Definition: StringRef.cpp:111
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)
Definition: StringSwitch.h:90
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
This function returns true if the memory access is aligned or if the target allows this specific unal...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:330
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
Type * getStructElementType(unsigned N) const
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:249
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition: Type.h:377
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
Base class of all SIMD vector types.
Definition: DerivedTypes.h:403
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:187
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition: TypeSize.h:243
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr bool isZero() const
Definition: TypeSize.h:156
self_iterator getIterator()
Definition: ilist_node.h:109
#define INT64_MIN
Definition: DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:144
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ GRAAL
Used by GraalVM. Two additional registers are reserved.
Definition: CallingConv.h:255
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:750
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1126
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1122
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:723
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:476
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ VECREDUCE_SEQ_FADD
Generic reduction nodes.
Definition: ISDOpcodes.h:1339
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1370
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:250
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1269
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:559
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:714
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1155
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1271
@ STRICT_FCEIL
Definition: ISDOpcodes.h:426
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1272
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1031
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:783
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:483
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ RETURNADDR
Definition: ISDOpcodes.h:95
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:790
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:543
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1355
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:390
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
Definition: ISDOpcodes.h:1359
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:688
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1228
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1233
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:255
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1369
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:477
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:913
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1267
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:903
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:229
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1268
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:411
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1400
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SET_ROUNDING
Set rounding mode.
Definition: ISDOpcodes.h:885
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:774
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:450
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:620
@ READSTEADYCOUNTER
READSTEADYCOUNTER - This corresponds to the readfixedcounter intrinsic.
Definition: ISDOpcodes.h:1188
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1352
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:722
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1221
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1356
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:988
@ STRICT_LROUND
Definition: ISDOpcodes.h:431
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:930
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1077
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:327
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1270
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1056
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the same...
Definition: ISDOpcodes.h:586
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition: ISDOpcodes.h:646
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:507
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:349
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:727
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:211
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1371
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:627
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1151
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:323
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:430
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1364
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:880
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:651
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:705
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:600
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1265
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:573
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:535
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:780
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1211
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:856
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:742
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1329
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1248
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1273
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:971
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:331
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1041
@ STRICT_LRINT
Definition: ISDOpcodes.h:433
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:798
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:674
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
Definition: ISDOpcodes.h:591
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:888
@ STRICT_FROUND
Definition: ISDOpcodes.h:428
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:736
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:449
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1372
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:427
@ STRICT_FROUNDEVEN
Definition: ISDOpcodes.h:429
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:129
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition: ISDOpcodes.h:922
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:94
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1263
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:443
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:465
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:442
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:984
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1264
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:836
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1182
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:470
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:680
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1208
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:400
@ SPLAT_VECTOR_PARTS
SPLAT_VECTOR_PARTS(SCALAR1, SCALAR2, ...) - Returns a vector with the scalar values joined together a...
Definition: ISDOpcodes.h:636
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:524
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ STRICT_LLRINT
Definition: ISDOpcodes.h:434
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition: ISDOpcodes.h:612
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1262
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:869
@ STRICT_LLROUND
Definition: ISDOpcodes.h:432
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:423
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:855
@ VECREDUCE_FMINIMUM
Definition: ISDOpcodes.h:1360
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:786
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1146
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1070
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:763
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:493
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:340
@ STRICT_FRINT
Definition: ISDOpcodes.h:422
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and output vectors having the sa...
Definition: ISDOpcodes.h:580
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:515
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
Definition: ISDOpcodes.h:1485
@ UNSIGNED_SCALED
Definition: ISDOpcodes.h:1485
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1472
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
static const int FIRST_TARGET_STRICTFP_OPCODE
FIRST_TARGET_STRICTFP_OPCODE - Target-specific pre-isel operations which cannot raise FP exceptions s...
Definition: ISDOpcodes.h:1406
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1523
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1503
bool isVPOpcode(unsigned Opcode)
Whether this is a vector-predicated Opcode.
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1568
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1459
@ Bitcast
Perform the operation on a different, but equivalently sized type.
ABI getTargetABI(StringRef ABIName)
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:548
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
@ TAIL_UNDISTURBED_MASK_UNDISTURBED
static int getFRMOpNum(const MCInstrDesc &Desc)
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #3 and #4) ...
int getLoadFPImm(APFloat FPImm)
getLoadFPImm - Return a 5-bit binary encoding of the floating-point immediate value.
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
static unsigned decodeVSEW(unsigned VSEW)
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
static RISCVII::VLMUL encodeLMUL(unsigned LMUL, bool Fractional)
static unsigned encodeSEW(unsigned SEW)
static constexpr unsigned FPMASK_Negative_Zero
static constexpr unsigned FPMASK_Positive_Subnormal
static constexpr unsigned FPMASK_Positive_Normal
static constexpr unsigned FPMASK_Negative_Subnormal
static constexpr unsigned FPMASK_Negative_Normal
static constexpr unsigned FPMASK_Positive_Infinity
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex)
static constexpr unsigned FPMASK_Negative_Infinity
static constexpr unsigned FPMASK_Quiet_NaN
ArrayRef< MCPhysReg > getArgGPRs(const RISCVABI::ABI ABI)
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static constexpr unsigned FPMASK_Signaling_NaN
static constexpr unsigned FPMASK_Positive_Zero
static constexpr unsigned RVVBitsPerBlock
bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, std::optional< unsigned > FirstMaskArgument)
bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, std::optional< unsigned > FirstMaskArgument)
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
@ System
Synchronized with respect to all concurrently executing threads.
Definition: LLVMContext.h:57
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
@ ReallyHidden
Definition: CommandLine.h:139
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
@ Offset
Definition: DWP.cpp:456
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1731
static const MachineMemOperand::Flags MONontemporalBit1
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:417
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2415
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition: bit.h:317
static const MachineMemOperand::Flags MONontemporalBit0
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:269
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1439
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:319
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:361
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition: STLExtras.h:1937
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1738
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:313
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:264
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:246
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CombineLevel
Definition: DAGCombine.h:15
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
unsigned getKillRegState(bool B)
DWARFExpression::Operation Op
RoundingMode
Rounding mode.
@ TowardZero
roundTowardZero.
@ NearestTiesToEven
roundTiesToEven.
@ TowardPositive
roundTowardPositive.
@ NearestTiesToAway
roundTiesToAway.
@ TowardNegative
roundTowardNegative.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1930
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1888
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:452
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
InstructionCost Cost
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
#define NC
Definition: regutils.h:42
unsigned StepDenominator
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:230
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:292
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
uint64_t getScalarStoreSize() const
Definition: ValueTypes.h:387
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:290
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:340
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition: ValueTypes.h:415
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isFixedLengthVector() const
Definition: ValueTypes.h:177
EVT getRoundIntegerType(LLVMContext &Context) const
Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...
Definition: ValueTypes.h:404
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:202
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:173
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:101
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:298
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
Helper struct to store a base, index and offset that forms an address.
Definition: LoadStoreOpt.h:38
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
static KnownBits urem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for urem(LHS, RHS).
Definition: KnownBits.cpp:1018
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:63
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition: KnownBits.h:270
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition: KnownBits.h:157
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:168
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:71
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition: KnownBits.h:292
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:307
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:176
static KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for udiv(LHS, RHS).
Definition: KnownBits.cpp:976
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition: KnownBits.h:276
static KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
Definition: KnownBits.cpp:279
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:141
BitVector getReservedRegs(const MachineFunction &MF) const override
Register getFrameRegister(const MachineFunction &MF) const override
These are IR-level optimization flags that may be propagated to SDNodes.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)