LLVM 20.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
19#include "RISCVRegisterInfo.h"
21#include "RISCVSubtarget.h"
22#include "llvm/ADT/SmallSet.h"
23#include "llvm/ADT/Statistic.h"
36#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
43#include "llvm/Support/Debug.h"
49#include <optional>
50
51using namespace llvm;
52
53#define DEBUG_TYPE "riscv-lower"
54
55STATISTIC(NumTailCalls, "Number of tail calls");
56
58 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
59 cl::desc("Give the maximum size (in number of nodes) of the web of "
60 "instructions that we will consider for VW expansion"),
61 cl::init(18));
62
63static cl::opt<bool>
64 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
65 cl::desc("Allow the formation of VW_W operations (e.g., "
66 "VWADD_W) with splat constants"),
67 cl::init(false));
68
70 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
71 cl::desc("Set the minimum number of repetitions of a divisor to allow "
72 "transformation to multiplications by the reciprocal"),
73 cl::init(2));
74
75static cl::opt<int>
77 cl::desc("Give the maximum number of instructions that we will "
78 "use for creating a floating-point immediate value"),
79 cl::init(2));
80
82 const RISCVSubtarget &STI)
83 : TargetLowering(TM), Subtarget(STI) {
84
85 RISCVABI::ABI ABI = Subtarget.getTargetABI();
86 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
87
88 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
89 !Subtarget.hasStdExtF()) {
90 errs() << "Hard-float 'f' ABI can't be used for a target that "
91 "doesn't support the F instruction set extension (ignoring "
92 "target-abi)\n";
94 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
95 !Subtarget.hasStdExtD()) {
96 errs() << "Hard-float 'd' ABI can't be used for a target that "
97 "doesn't support the D instruction set extension (ignoring "
98 "target-abi)\n";
100 }
101
102 switch (ABI) {
103 default:
104 report_fatal_error("Don't know how to lower this ABI");
113 break;
114 }
115
116 MVT XLenVT = Subtarget.getXLenVT();
117
118 // Set up the register classes.
119 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
120
121 if (Subtarget.hasStdExtZfhmin())
122 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
123 if (Subtarget.hasStdExtZfbfmin())
124 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
125 if (Subtarget.hasStdExtF())
126 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
127 if (Subtarget.hasStdExtD())
128 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
129 if (Subtarget.hasStdExtZhinxmin())
130 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
131 if (Subtarget.hasStdExtZfinx())
132 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
133 if (Subtarget.hasStdExtZdinx()) {
134 if (Subtarget.is64Bit())
135 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
136 else
137 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
138 }
139
140 static const MVT::SimpleValueType BoolVecVTs[] = {
141 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
142 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
143 static const MVT::SimpleValueType IntVecVTs[] = {
144 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
145 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
146 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
147 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
148 MVT::nxv4i64, MVT::nxv8i64};
149 static const MVT::SimpleValueType F16VecVTs[] = {
150 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
151 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
152 static const MVT::SimpleValueType BF16VecVTs[] = {
153 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
154 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
155 static const MVT::SimpleValueType F32VecVTs[] = {
156 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
157 static const MVT::SimpleValueType F64VecVTs[] = {
158 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
159 static const MVT::SimpleValueType VecTupleVTs[] = {
160 MVT::riscv_nxv1i8x2, MVT::riscv_nxv1i8x3, MVT::riscv_nxv1i8x4,
161 MVT::riscv_nxv1i8x5, MVT::riscv_nxv1i8x6, MVT::riscv_nxv1i8x7,
162 MVT::riscv_nxv1i8x8, MVT::riscv_nxv2i8x2, MVT::riscv_nxv2i8x3,
163 MVT::riscv_nxv2i8x4, MVT::riscv_nxv2i8x5, MVT::riscv_nxv2i8x6,
164 MVT::riscv_nxv2i8x7, MVT::riscv_nxv2i8x8, MVT::riscv_nxv4i8x2,
165 MVT::riscv_nxv4i8x3, MVT::riscv_nxv4i8x4, MVT::riscv_nxv4i8x5,
166 MVT::riscv_nxv4i8x6, MVT::riscv_nxv4i8x7, MVT::riscv_nxv4i8x8,
167 MVT::riscv_nxv8i8x2, MVT::riscv_nxv8i8x3, MVT::riscv_nxv8i8x4,
168 MVT::riscv_nxv8i8x5, MVT::riscv_nxv8i8x6, MVT::riscv_nxv8i8x7,
169 MVT::riscv_nxv8i8x8, MVT::riscv_nxv16i8x2, MVT::riscv_nxv16i8x3,
170 MVT::riscv_nxv16i8x4, MVT::riscv_nxv32i8x2};
171
172 if (Subtarget.hasVInstructions()) {
173 auto addRegClassForRVV = [this](MVT VT) {
174 // Disable the smallest fractional LMUL types if ELEN is less than
175 // RVVBitsPerBlock.
176 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
177 if (VT.getVectorMinNumElements() < MinElts)
178 return;
179
180 unsigned Size = VT.getSizeInBits().getKnownMinValue();
181 const TargetRegisterClass *RC;
183 RC = &RISCV::VRRegClass;
184 else if (Size == 2 * RISCV::RVVBitsPerBlock)
185 RC = &RISCV::VRM2RegClass;
186 else if (Size == 4 * RISCV::RVVBitsPerBlock)
187 RC = &RISCV::VRM4RegClass;
188 else if (Size == 8 * RISCV::RVVBitsPerBlock)
189 RC = &RISCV::VRM8RegClass;
190 else
191 llvm_unreachable("Unexpected size");
192
193 addRegisterClass(VT, RC);
194 };
195
196 for (MVT VT : BoolVecVTs)
197 addRegClassForRVV(VT);
198 for (MVT VT : IntVecVTs) {
199 if (VT.getVectorElementType() == MVT::i64 &&
200 !Subtarget.hasVInstructionsI64())
201 continue;
202 addRegClassForRVV(VT);
203 }
204
205 if (Subtarget.hasVInstructionsF16Minimal())
206 for (MVT VT : F16VecVTs)
207 addRegClassForRVV(VT);
208
209 if (Subtarget.hasVInstructionsBF16Minimal())
210 for (MVT VT : BF16VecVTs)
211 addRegClassForRVV(VT);
212
213 if (Subtarget.hasVInstructionsF32())
214 for (MVT VT : F32VecVTs)
215 addRegClassForRVV(VT);
216
217 if (Subtarget.hasVInstructionsF64())
218 for (MVT VT : F64VecVTs)
219 addRegClassForRVV(VT);
220
221 if (Subtarget.useRVVForFixedLengthVectors()) {
222 auto addRegClassForFixedVectors = [this](MVT VT) {
223 MVT ContainerVT = getContainerForFixedLengthVector(VT);
224 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
225 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
226 addRegisterClass(VT, TRI.getRegClass(RCID));
227 };
229 if (useRVVForFixedLengthVectorVT(VT))
230 addRegClassForFixedVectors(VT);
231
233 if (useRVVForFixedLengthVectorVT(VT))
234 addRegClassForFixedVectors(VT);
235 }
236
237 addRegisterClass(MVT::riscv_nxv1i8x2, &RISCV::VRN2M1RegClass);
238 addRegisterClass(MVT::riscv_nxv1i8x3, &RISCV::VRN3M1RegClass);
239 addRegisterClass(MVT::riscv_nxv1i8x4, &RISCV::VRN4M1RegClass);
240 addRegisterClass(MVT::riscv_nxv1i8x5, &RISCV::VRN5M1RegClass);
241 addRegisterClass(MVT::riscv_nxv1i8x6, &RISCV::VRN6M1RegClass);
242 addRegisterClass(MVT::riscv_nxv1i8x7, &RISCV::VRN7M1RegClass);
243 addRegisterClass(MVT::riscv_nxv1i8x8, &RISCV::VRN8M1RegClass);
244 addRegisterClass(MVT::riscv_nxv2i8x2, &RISCV::VRN2M1RegClass);
245 addRegisterClass(MVT::riscv_nxv2i8x3, &RISCV::VRN3M1RegClass);
246 addRegisterClass(MVT::riscv_nxv2i8x4, &RISCV::VRN4M1RegClass);
247 addRegisterClass(MVT::riscv_nxv2i8x5, &RISCV::VRN5M1RegClass);
248 addRegisterClass(MVT::riscv_nxv2i8x6, &RISCV::VRN6M1RegClass);
249 addRegisterClass(MVT::riscv_nxv2i8x7, &RISCV::VRN7M1RegClass);
250 addRegisterClass(MVT::riscv_nxv2i8x8, &RISCV::VRN8M1RegClass);
251 addRegisterClass(MVT::riscv_nxv4i8x2, &RISCV::VRN2M1RegClass);
252 addRegisterClass(MVT::riscv_nxv4i8x3, &RISCV::VRN3M1RegClass);
253 addRegisterClass(MVT::riscv_nxv4i8x4, &RISCV::VRN4M1RegClass);
254 addRegisterClass(MVT::riscv_nxv4i8x5, &RISCV::VRN5M1RegClass);
255 addRegisterClass(MVT::riscv_nxv4i8x6, &RISCV::VRN6M1RegClass);
256 addRegisterClass(MVT::riscv_nxv4i8x7, &RISCV::VRN7M1RegClass);
257 addRegisterClass(MVT::riscv_nxv4i8x8, &RISCV::VRN8M1RegClass);
258 addRegisterClass(MVT::riscv_nxv8i8x2, &RISCV::VRN2M1RegClass);
259 addRegisterClass(MVT::riscv_nxv8i8x3, &RISCV::VRN3M1RegClass);
260 addRegisterClass(MVT::riscv_nxv8i8x4, &RISCV::VRN4M1RegClass);
261 addRegisterClass(MVT::riscv_nxv8i8x5, &RISCV::VRN5M1RegClass);
262 addRegisterClass(MVT::riscv_nxv8i8x6, &RISCV::VRN6M1RegClass);
263 addRegisterClass(MVT::riscv_nxv8i8x7, &RISCV::VRN7M1RegClass);
264 addRegisterClass(MVT::riscv_nxv8i8x8, &RISCV::VRN8M1RegClass);
265 addRegisterClass(MVT::riscv_nxv16i8x2, &RISCV::VRN2M2RegClass);
266 addRegisterClass(MVT::riscv_nxv16i8x3, &RISCV::VRN3M2RegClass);
267 addRegisterClass(MVT::riscv_nxv16i8x4, &RISCV::VRN4M2RegClass);
268 addRegisterClass(MVT::riscv_nxv32i8x2, &RISCV::VRN2M4RegClass);
269 }
270
271 // Compute derived properties from the register classes.
273
275
277 MVT::i1, Promote);
278 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
280 MVT::i1, Promote);
281
282 // TODO: add all necessary setOperationAction calls.
284
289
294 if (!(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
297 }
298
300
303
304 if (!Subtarget.hasVendorXTHeadBb())
306
308
309 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb() &&
310 !(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()))
311 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
312
313 if (Subtarget.is64Bit()) {
315
318 MVT::i32, Custom);
320 if (!Subtarget.hasStdExtZbb())
323 Custom);
325 }
326 if (!Subtarget.hasStdExtZmmul()) {
328 } else if (Subtarget.is64Bit()) {
331 } else {
333 }
334
335 if (!Subtarget.hasStdExtM()) {
337 Expand);
338 } else if (Subtarget.is64Bit()) {
340 {MVT::i8, MVT::i16, MVT::i32}, Custom);
341 }
342
345 Expand);
346
348 Custom);
349
350 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
351 if (Subtarget.is64Bit())
353 } else if (Subtarget.hasVendorXTHeadBb()) {
354 if (Subtarget.is64Bit())
357 } else if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
359 } else {
361 }
362
363 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
364 // pattern match it directly in isel.
366 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
367 Subtarget.hasVendorXTHeadBb())
368 ? Legal
369 : Expand);
370
371 if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
373 } else {
374 // Zbkb can use rev8+brev8 to implement bitreverse.
376 Subtarget.hasStdExtZbkb() ? Custom : Expand);
377 }
378
379 if (Subtarget.hasStdExtZbb() ||
380 (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
382 Legal);
383 }
384
385 if (Subtarget.hasStdExtZbb() ||
386 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
387 if (Subtarget.is64Bit())
389 } else {
391 }
392
393 if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
394 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
395 // We need the custom lowering to make sure that the resulting sequence
396 // for the 32bit case is efficient on 64bit targets.
397 if (Subtarget.is64Bit())
399 } else {
401 }
402
403 if (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()) {
405 } else if (Subtarget.hasShortForwardBranchOpt()) {
406 // We can use PseudoCCSUB to implement ABS.
408 } else if (Subtarget.is64Bit()) {
410 }
411
412 if (!Subtarget.hasVendorXTHeadCondMov())
414
415 static const unsigned FPLegalNodeTypes[] = {
423
424 static const ISD::CondCode FPCCToExpand[] = {
428
429 static const unsigned FPOpToExpand[] = {
431 ISD::FREM};
432
433 static const unsigned FPRndMode[] = {
436
437 static const unsigned ZfhminZfbfminPromoteOps[] = {
447
448 if (Subtarget.hasStdExtZfbfmin()) {
454 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
461 }
462
463 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
464 if (Subtarget.hasStdExtZfhOrZhinx()) {
465 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
466 setOperationAction(FPRndMode, MVT::f16,
467 Subtarget.hasStdExtZfa() ? Legal : Custom);
470 Subtarget.hasStdExtZfa() ? Legal : Custom);
471 if (Subtarget.hasStdExtZfa())
473 } else {
474 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
479 setOperationAction(Op, MVT::f16, Custom);
485 }
486
488
491 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
495
497 ISD::FNEARBYINT, MVT::f16,
498 Subtarget.hasStdExtZfh() && Subtarget.hasStdExtZfa() ? Legal : Promote);
503 MVT::f16, Promote);
504
505 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
506 // complete support for all operations in LegalizeDAG.
511 MVT::f16, Promote);
512
513 // We need to custom promote this.
514 if (Subtarget.is64Bit())
516 }
517
518 if (Subtarget.hasStdExtFOrZfinx()) {
519 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
520 setOperationAction(FPRndMode, MVT::f32,
521 Subtarget.hasStdExtZfa() ? Legal : Custom);
522 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
526 setOperationAction(FPOpToExpand, MVT::f32, Expand);
527 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
528 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
529 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
530 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
534 Subtarget.isSoftFPABI() ? LibCall : Custom);
539
540 if (Subtarget.hasStdExtZfa()) {
544 } else {
546 }
547 }
548
549 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
551
552 if (Subtarget.hasStdExtDOrZdinx()) {
553 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
554
555 if (!Subtarget.is64Bit())
557
558 if (Subtarget.hasStdExtZfa()) {
560 setOperationAction(FPRndMode, MVT::f64, Legal);
563 } else {
564 if (Subtarget.is64Bit())
565 setOperationAction(FPRndMode, MVT::f64, Custom);
566
568 }
569
572 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
576 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
577 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
578 setOperationAction(FPOpToExpand, MVT::f64, Expand);
579 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
580 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
581 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
582 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
586 Subtarget.isSoftFPABI() ? LibCall : Custom);
591 }
592
593 if (Subtarget.is64Bit()) {
596 MVT::i32, Custom);
598 }
599
600 if (Subtarget.hasStdExtFOrZfinx()) {
602 Custom);
603
604 // f16/bf16 require custom handling.
606 Custom);
608 Custom);
609
612 }
613
616 XLenVT, Custom);
617
619
620 if (Subtarget.is64Bit())
622
623 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
624 // Unfortunately this can't be determined just from the ISA naming string.
626 Subtarget.is64Bit() ? Legal : Custom);
628 Subtarget.is64Bit() ? Legal : Custom);
629
630 if (Subtarget.is64Bit()) {
633 }
634
637 if (Subtarget.is64Bit())
639
640 if (Subtarget.hasStdExtZicbop()) {
642 }
643
644 if (Subtarget.hasStdExtA()) {
646 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
648 else
650 } else if (Subtarget.hasForcedAtomics()) {
652 } else {
654 }
655
657
659
660 if (getTargetMachine().getTargetTriple().isOSLinux()) {
661 // Custom lowering of llvm.clear_cache.
663 }
664
665 if (Subtarget.hasVInstructions()) {
667
669
670 // RVV intrinsics may have illegal operands.
671 // We also need to custom legalize vmv.x.s.
674 {MVT::i8, MVT::i16}, Custom);
675 if (Subtarget.is64Bit())
677 MVT::i32, Custom);
678 else
680 MVT::i64, Custom);
681
683 MVT::Other, Custom);
684
685 static const unsigned IntegerVPOps[] = {
686 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
687 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
688 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
689 ISD::VP_XOR, ISD::VP_SRA, ISD::VP_SRL,
690 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
691 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
692 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
693 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
694 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
695 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
696 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
697 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
698 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
699 ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF,
700 ISD::EXPERIMENTAL_VP_SPLAT};
701
702 static const unsigned FloatingPointVPOps[] = {
703 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
704 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
705 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
706 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
707 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
708 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
709 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
710 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
711 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
712 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
713 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
714 ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE,
715 ISD::EXPERIMENTAL_VP_SPLICE, ISD::VP_REDUCE_FMINIMUM,
716 ISD::VP_REDUCE_FMAXIMUM, ISD::EXPERIMENTAL_VP_SPLAT};
717
718 static const unsigned IntegerVecReduceOps[] = {
722
723 static const unsigned FloatingPointVecReduceOps[] = {
726
727 static const unsigned FloatingPointLibCallOps[] = {
730
731 if (!Subtarget.is64Bit()) {
732 // We must custom-lower certain vXi64 operations on RV32 due to the vector
733 // element type being illegal.
735 MVT::i64, Custom);
736
737 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
738
739 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
740 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
741 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
742 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
743 MVT::i64, Custom);
744 }
745
746 for (MVT VT : BoolVecVTs) {
747 if (!isTypeLegal(VT))
748 continue;
749
751
752 // Mask VTs are custom-expanded into a series of standard nodes
756 VT, Custom);
757
759 Custom);
760
762 setOperationAction({ISD::SELECT_CC, ISD::VSELECT, ISD::VP_SELECT}, VT,
763 Expand);
764 setOperationAction(ISD::VP_MERGE, VT, Custom);
765
766 setOperationAction({ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT,
767 Custom);
768
769 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
770
773 Custom);
774
776 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
777 Custom);
778
779 // RVV has native int->float & float->int conversions where the
780 // element type sizes are within one power-of-two of each other. Any
781 // wider distances between type sizes have to be lowered as sequences
782 // which progressively narrow the gap in stages.
787 VT, Custom);
789 Custom);
790
791 // Expand all extending loads to types larger than this, and truncating
792 // stores from types larger than this.
794 setTruncStoreAction(VT, OtherVT, Expand);
796 OtherVT, Expand);
797 }
798
799 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
800 ISD::VP_TRUNCATE, ISD::VP_SETCC},
801 VT, Custom);
802
805
807
808 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
809 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
810
813 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
814 }
815
816 for (MVT VT : IntVecVTs) {
817 if (!isTypeLegal(VT))
818 continue;
819
822
823 // Vectors implement MULHS/MULHU.
825
826 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
827 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
829
831 Legal);
832
834
835 // Custom-lower extensions and truncations from/to mask types.
837 VT, Custom);
838
839 // RVV has native int->float & float->int conversions where the
840 // element type sizes are within one power-of-two of each other. Any
841 // wider distances between type sizes have to be lowered as sequences
842 // which progressively narrow the gap in stages.
847 VT, Custom);
849 Custom);
853 VT, Legal);
854
855 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
856 // nodes which truncate by one power of two at a time.
859 Custom);
860
861 // Custom-lower insert/extract operations to simplify patterns.
863 Custom);
864
865 // Custom-lower reduction operations to set up the corresponding custom
866 // nodes' operands.
867 setOperationAction(IntegerVecReduceOps, VT, Custom);
868
869 setOperationAction(IntegerVPOps, VT, Custom);
870
872
874 VT, Custom);
875
877 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
878 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
879 VT, Custom);
880
883 VT, Custom);
884
887
889
891 setTruncStoreAction(VT, OtherVT, Expand);
893 OtherVT, Expand);
894 }
895
898
899 // Splice
901
902 if (Subtarget.hasStdExtZvkb()) {
904 setOperationAction(ISD::VP_BSWAP, VT, Custom);
905 } else {
906 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
908 }
909
910 if (Subtarget.hasStdExtZvbb()) {
912 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
913 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
914 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
915 VT, Custom);
916 } else {
917 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
919 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
920 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
921 VT, Expand);
922
923 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
924 // range of f32.
925 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
926 if (isTypeLegal(FloatVT)) {
928 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
929 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
930 VT, Custom);
931 }
932 }
933
935 }
936
937 for (MVT VT : VecTupleVTs) {
938 if (!isTypeLegal(VT))
939 continue;
940
942 }
943
944 // Expand various CCs to best match the RVV ISA, which natively supports UNE
945 // but no other unordered comparisons, and supports all ordered comparisons
946 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
947 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
948 // and we pattern-match those back to the "original", swapping operands once
949 // more. This way we catch both operations and both "vf" and "fv" forms with
950 // fewer patterns.
951 static const ISD::CondCode VFPCCToExpand[] = {
955 };
956
957 // TODO: support more ops.
958 static const unsigned ZvfhminZvfbfminPromoteOps[] = {
966
967 // TODO: support more vp ops.
968 static const unsigned ZvfhminZvfbfminPromoteVPOps[] = {
969 ISD::VP_FADD,
970 ISD::VP_FSUB,
971 ISD::VP_FMUL,
972 ISD::VP_FDIV,
973 ISD::VP_FMA,
974 ISD::VP_REDUCE_FMIN,
975 ISD::VP_REDUCE_FMAX,
976 ISD::VP_SQRT,
977 ISD::VP_FMINNUM,
978 ISD::VP_FMAXNUM,
979 ISD::VP_FCEIL,
980 ISD::VP_FFLOOR,
981 ISD::VP_FROUND,
982 ISD::VP_FROUNDEVEN,
983 ISD::VP_FROUNDTOZERO,
984 ISD::VP_FRINT,
985 ISD::VP_FNEARBYINT,
986 ISD::VP_SETCC,
987 ISD::VP_FMINIMUM,
988 ISD::VP_FMAXIMUM,
989 ISD::VP_REDUCE_FMINIMUM,
990 ISD::VP_REDUCE_FMAXIMUM};
991
992 // Sets common operation actions on RVV floating-point vector types.
993 const auto SetCommonVFPActions = [&](MVT VT) {
995 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
996 // sizes are within one power-of-two of each other. Therefore conversions
997 // between vXf16 and vXf64 must be lowered as sequences which convert via
998 // vXf32.
1001 // Custom-lower insert/extract operations to simplify patterns.
1003 Custom);
1004 // Expand various condition codes (explained above).
1005 setCondCodeAction(VFPCCToExpand, VT, Expand);
1006
1009
1013 VT, Custom);
1014
1015 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1016
1017 // Expand FP operations that need libcalls.
1018 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1019
1021
1023
1025 VT, Custom);
1026
1028 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1029 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1030 VT, Custom);
1031
1034
1037 VT, Custom);
1038
1041
1043
1044 setOperationAction(FloatingPointVPOps, VT, Custom);
1045
1047 Custom);
1050 VT, Legal);
1055 VT, Custom);
1056
1058 };
1059
1060 // Sets common extload/truncstore actions on RVV floating-point vector
1061 // types.
1062 const auto SetCommonVFPExtLoadTruncStoreActions =
1063 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1064 for (auto SmallVT : SmallerVTs) {
1065 setTruncStoreAction(VT, SmallVT, Expand);
1066 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1067 }
1068 };
1069
1070 // Sets common actions for f16 and bf16 for when there's only
1071 // zvfhmin/zvfbfmin and we need to promote to f32 for most operations.
1072 const auto SetCommonPromoteToF32Actions = [&](MVT VT) {
1075 Custom);
1076 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1077 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1078 Custom);
1080 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom);
1086 VT, Custom);
1087 MVT EltVT = VT.getVectorElementType();
1088 if (isTypeLegal(EltVT))
1089 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT,
1091 VT, Custom);
1092 else
1093 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT},
1094 EltVT, Custom);
1096 ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD,
1097 ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1098 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1099 ISD::VP_SCATTER},
1100 VT, Custom);
1101
1105
1106 // Expand FP operations that need libcalls.
1107 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1108
1109 // Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.
1110 if (getLMUL(VT) == RISCVII::VLMUL::LMUL_8) {
1111 setOperationAction(ZvfhminZvfbfminPromoteOps, VT, Custom);
1112 setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom);
1113 } else {
1114 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1115 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1116 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1117 }
1118 };
1119
1120 if (Subtarget.hasVInstructionsF16()) {
1121 for (MVT VT : F16VecVTs) {
1122 if (!isTypeLegal(VT))
1123 continue;
1124 SetCommonVFPActions(VT);
1125 }
1126 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1127 for (MVT VT : F16VecVTs) {
1128 if (!isTypeLegal(VT))
1129 continue;
1130 SetCommonPromoteToF32Actions(VT);
1131 }
1132 }
1133
1134 if (Subtarget.hasVInstructionsBF16Minimal()) {
1135 for (MVT VT : BF16VecVTs) {
1136 if (!isTypeLegal(VT))
1137 continue;
1138 SetCommonPromoteToF32Actions(VT);
1139 }
1140 }
1141
1142 if (Subtarget.hasVInstructionsF32()) {
1143 for (MVT VT : F32VecVTs) {
1144 if (!isTypeLegal(VT))
1145 continue;
1146 SetCommonVFPActions(VT);
1147 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1148 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1149 }
1150 }
1151
1152 if (Subtarget.hasVInstructionsF64()) {
1153 for (MVT VT : F64VecVTs) {
1154 if (!isTypeLegal(VT))
1155 continue;
1156 SetCommonVFPActions(VT);
1157 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1158 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1159 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1160 }
1161 }
1162
1163 if (Subtarget.useRVVForFixedLengthVectors()) {
1165 if (!useRVVForFixedLengthVectorVT(VT))
1166 continue;
1167
1168 // By default everything must be expanded.
1169 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1172 setTruncStoreAction(VT, OtherVT, Expand);
1174 OtherVT, Expand);
1175 }
1176
1177 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1178 // expansion to a build_vector of 0s.
1180
1181 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1183 Custom);
1184
1187 Custom);
1188
1190 VT, Custom);
1191
1193
1195
1197
1199
1202 Custom);
1203
1205
1208 Custom);
1209
1211 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1212 Custom);
1213
1215 {
1224 },
1225 VT, Custom);
1227 Custom);
1228
1230
1231 // Operations below are different for between masks and other vectors.
1232 if (VT.getVectorElementType() == MVT::i1) {
1233 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1234 ISD::OR, ISD::XOR},
1235 VT, Custom);
1236
1237 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1238 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1239 VT, Custom);
1240
1241 setOperationAction(ISD::VP_MERGE, VT, Custom);
1242
1243 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1244 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1245 continue;
1246 }
1247
1248 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1249 // it before type legalization for i64 vectors on RV32. It will then be
1250 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1251 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1252 // improvements first.
1253 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1256 }
1257
1260
1261 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1262 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1263 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1264 ISD::VP_SCATTER},
1265 VT, Custom);
1266
1270 VT, Custom);
1271
1274
1276
1277 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1278 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1280
1284 VT, Custom);
1285
1287
1290
1291 // Custom-lower reduction operations to set up the corresponding custom
1292 // nodes' operands.
1296 VT, Custom);
1297
1298 setOperationAction(IntegerVPOps, VT, Custom);
1299
1300 if (Subtarget.hasStdExtZvkb())
1302
1303 if (Subtarget.hasStdExtZvbb()) {
1306 VT, Custom);
1307 } else {
1308 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1309 // range of f32.
1310 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1311 if (isTypeLegal(FloatVT))
1314 Custom);
1315 }
1316
1318 }
1319
1321 // There are no extending loads or truncating stores.
1322 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1323 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1324 setTruncStoreAction(VT, InnerVT, Expand);
1325 }
1326
1327 if (!useRVVForFixedLengthVectorVT(VT))
1328 continue;
1329
1330 // By default everything must be expanded.
1331 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1333
1334 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1335 // expansion to a build_vector of 0s.
1337
1342 VT, Custom);
1343
1346 VT, Custom);
1347 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER,
1348 ISD::VP_SCATTER, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1349 ISD::EXPERIMENTAL_VP_STRIDED_STORE},
1350 VT, Custom);
1351
1354 Custom);
1355
1356 if (VT.getVectorElementType() == MVT::f16 &&
1357 !Subtarget.hasVInstructionsF16()) {
1359 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1361 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1362 Custom);
1363 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT,
1364 Custom);
1365 if (Subtarget.hasStdExtZfhmin()) {
1367 } else {
1368 // We need to custom legalize f16 build vectors if Zfhmin isn't
1369 // available.
1371 }
1375 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1376 // Don't promote f16 vector operations to f32 if f32 vector type is
1377 // not legal.
1378 // TODO: could split the f16 vector into two vectors and do promotion.
1379 if (!isTypeLegal(F32VecVT))
1380 continue;
1381 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1382 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1383 continue;
1384 }
1385
1386 if (VT.getVectorElementType() == MVT::bf16) {
1388 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1389 if (Subtarget.hasStdExtZfbfmin()) {
1391 } else {
1392 // We need to custom legalize bf16 build vectors if Zfbfmin isn't
1393 // available.
1395 }
1397 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1398 Custom);
1399 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1400 // Don't promote f16 vector operations to f32 if f32 vector type is
1401 // not legal.
1402 // TODO: could split the f16 vector into two vectors and do promotion.
1403 if (!isTypeLegal(F32VecVT))
1404 continue;
1405 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1406 // TODO: Promote VP ops to fp32.
1407 continue;
1408 }
1409
1411 Custom);
1412
1417 VT, Custom);
1418
1421 VT, Custom);
1422
1423 setCondCodeAction(VFPCCToExpand, VT, Expand);
1424
1427
1429
1430 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1431
1432 setOperationAction(FloatingPointVPOps, VT, Custom);
1433
1440 VT, Custom);
1441 }
1442
1443 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1444 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32}, Custom);
1445 if (Subtarget.is64Bit())
1447 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1449 if (Subtarget.hasStdExtZfbfmin())
1451 if (Subtarget.hasStdExtFOrZfinx())
1453 if (Subtarget.hasStdExtDOrZdinx())
1455 }
1456 }
1457
1458 if (Subtarget.hasStdExtA())
1460
1461 if (Subtarget.hasForcedAtomics()) {
1462 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1468 XLenVT, LibCall);
1469 }
1470
1471 if (Subtarget.hasVendorXTHeadMemIdx()) {
1472 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1473 setIndexedLoadAction(im, MVT::i8, Legal);
1474 setIndexedStoreAction(im, MVT::i8, Legal);
1475 setIndexedLoadAction(im, MVT::i16, Legal);
1476 setIndexedStoreAction(im, MVT::i16, Legal);
1477 setIndexedLoadAction(im, MVT::i32, Legal);
1478 setIndexedStoreAction(im, MVT::i32, Legal);
1479
1480 if (Subtarget.is64Bit()) {
1481 setIndexedLoadAction(im, MVT::i64, Legal);
1482 setIndexedStoreAction(im, MVT::i64, Legal);
1483 }
1484 }
1485 }
1486
1487 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
1491
1495 }
1496
1497 // Function alignments.
1498 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
1499 setMinFunctionAlignment(FunctionAlignment);
1500 // Set preferred alignments.
1503
1509
1510 if (Subtarget.hasStdExtFOrZfinx())
1512
1513 if (Subtarget.hasStdExtZbb())
1515
1516 if ((Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) ||
1517 Subtarget.hasVInstructions())
1519
1520 if (Subtarget.hasStdExtZbkb())
1522
1523 if (Subtarget.hasStdExtFOrZfinx())
1526 if (Subtarget.hasVInstructions())
1528 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
1531 ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL,
1534 if (Subtarget.hasVendorXTHeadMemPair())
1536 if (Subtarget.useRVVForFixedLengthVectors())
1538
1539 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1540 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1541
1542 // Disable strict node mutation.
1543 IsStrictFPEnabled = true;
1544 EnableExtLdPromotion = true;
1545
1546 // Let the subtarget decide if a predictable select is more expensive than the
1547 // corresponding branch. This information is used in CGP/SelectOpt to decide
1548 // when to convert selects into branches.
1549 PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive();
1550
1551 MaxStoresPerMemsetOptSize = Subtarget.getMaxStoresPerMemset(/*OptSize=*/true);
1552 MaxStoresPerMemset = Subtarget.getMaxStoresPerMemset(/*OptSize=*/false);
1553
1555 MaxStoresPerMemcpyOptSize = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/true);
1556 MaxStoresPerMemcpy = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/false);
1557
1559 Subtarget.getMaxStoresPerMemmove(/*OptSize=*/true);
1560 MaxStoresPerMemmove = Subtarget.getMaxStoresPerMemmove(/*OptSize=*/false);
1561
1562 MaxLoadsPerMemcmpOptSize = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/true);
1563 MaxLoadsPerMemcmp = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/false);
1564}
1565
1567 LLVMContext &Context,
1568 EVT VT) const {
1569 if (!VT.isVector())
1570 return getPointerTy(DL);
1571 if (Subtarget.hasVInstructions() &&
1572 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1573 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1575}
1576
1577MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1578 return Subtarget.getXLenVT();
1579}
1580
1581// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1582bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1583 unsigned VF,
1584 bool IsScalable) const {
1585 if (!Subtarget.hasVInstructions())
1586 return true;
1587
1588 if (!IsScalable)
1589 return true;
1590
1591 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1592 return true;
1593
1594 // Don't allow VF=1 if those types are't legal.
1595 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1596 return true;
1597
1598 // VLEN=32 support is incomplete.
1599 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1600 return true;
1601
1602 // The maximum VF is for the smallest element width with LMUL=8.
1603 // VF must be a power of 2.
1604 unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
1605 return VF > MaxVF || !isPowerOf2_32(VF);
1606}
1607
1609 return !Subtarget.hasVInstructions() ||
1610 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
1611}
1612
1614 const CallInst &I,
1615 MachineFunction &MF,
1616 unsigned Intrinsic) const {
1617 auto &DL = I.getDataLayout();
1618
1619 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1620 bool IsUnitStrided, bool UsePtrVal = false) {
1622 // We can't use ptrVal if the intrinsic can access memory before the
1623 // pointer. This means we can't use it for strided or indexed intrinsics.
1624 if (UsePtrVal)
1625 Info.ptrVal = I.getArgOperand(PtrOp);
1626 else
1627 Info.fallbackAddressSpace =
1628 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1629 Type *MemTy;
1630 if (IsStore) {
1631 // Store value is the first operand.
1632 MemTy = I.getArgOperand(0)->getType();
1633 } else {
1634 // Use return type. If it's segment load, return type is a struct.
1635 MemTy = I.getType();
1636 if (MemTy->isStructTy())
1637 MemTy = MemTy->getStructElementType(0);
1638 }
1639 if (!IsUnitStrided)
1640 MemTy = MemTy->getScalarType();
1641
1642 Info.memVT = getValueType(DL, MemTy);
1643 if (MemTy->isTargetExtTy()) {
1644 // RISC-V vector tuple type's alignment type should be its element type.
1645 if (cast<TargetExtType>(MemTy)->getName() == "riscv.vector.tuple")
1646 MemTy = Type::getIntNTy(
1647 MemTy->getContext(),
1648 1 << cast<ConstantInt>(I.getArgOperand(I.arg_size() - 1))
1649 ->getZExtValue());
1650 Info.align = DL.getABITypeAlign(MemTy);
1651 } else {
1652 Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);
1653 }
1655 Info.flags |=
1657 return true;
1658 };
1659
1660 if (I.hasMetadata(LLVMContext::MD_nontemporal))
1662
1664 switch (Intrinsic) {
1665 default:
1666 return false;
1667 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1668 case Intrinsic::riscv_masked_atomicrmw_add_i32:
1669 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1670 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1671 case Intrinsic::riscv_masked_atomicrmw_max_i32:
1672 case Intrinsic::riscv_masked_atomicrmw_min_i32:
1673 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1674 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1675 case Intrinsic::riscv_masked_cmpxchg_i32:
1677 Info.memVT = MVT::i32;
1678 Info.ptrVal = I.getArgOperand(0);
1679 Info.offset = 0;
1680 Info.align = Align(4);
1683 return true;
1684 case Intrinsic::riscv_seg2_load:
1685 case Intrinsic::riscv_seg3_load:
1686 case Intrinsic::riscv_seg4_load:
1687 case Intrinsic::riscv_seg5_load:
1688 case Intrinsic::riscv_seg6_load:
1689 case Intrinsic::riscv_seg7_load:
1690 case Intrinsic::riscv_seg8_load:
1691 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1692 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1693 case Intrinsic::riscv_seg2_store:
1694 case Intrinsic::riscv_seg3_store:
1695 case Intrinsic::riscv_seg4_store:
1696 case Intrinsic::riscv_seg5_store:
1697 case Intrinsic::riscv_seg6_store:
1698 case Intrinsic::riscv_seg7_store:
1699 case Intrinsic::riscv_seg8_store:
1700 // Operands are (vec, ..., vec, ptr, vl)
1701 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1702 /*IsStore*/ true,
1703 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1704 case Intrinsic::riscv_vle:
1705 case Intrinsic::riscv_vle_mask:
1706 case Intrinsic::riscv_vleff:
1707 case Intrinsic::riscv_vleff_mask:
1708 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1709 /*IsStore*/ false,
1710 /*IsUnitStrided*/ true,
1711 /*UsePtrVal*/ true);
1712 case Intrinsic::riscv_vse:
1713 case Intrinsic::riscv_vse_mask:
1714 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1715 /*IsStore*/ true,
1716 /*IsUnitStrided*/ true,
1717 /*UsePtrVal*/ true);
1718 case Intrinsic::riscv_vlse:
1719 case Intrinsic::riscv_vlse_mask:
1720 case Intrinsic::riscv_vloxei:
1721 case Intrinsic::riscv_vloxei_mask:
1722 case Intrinsic::riscv_vluxei:
1723 case Intrinsic::riscv_vluxei_mask:
1724 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1725 /*IsStore*/ false,
1726 /*IsUnitStrided*/ false);
1727 case Intrinsic::riscv_vsse:
1728 case Intrinsic::riscv_vsse_mask:
1729 case Intrinsic::riscv_vsoxei:
1730 case Intrinsic::riscv_vsoxei_mask:
1731 case Intrinsic::riscv_vsuxei:
1732 case Intrinsic::riscv_vsuxei_mask:
1733 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1734 /*IsStore*/ true,
1735 /*IsUnitStrided*/ false);
1736 case Intrinsic::riscv_vlseg2:
1737 case Intrinsic::riscv_vlseg3:
1738 case Intrinsic::riscv_vlseg4:
1739 case Intrinsic::riscv_vlseg5:
1740 case Intrinsic::riscv_vlseg6:
1741 case Intrinsic::riscv_vlseg7:
1742 case Intrinsic::riscv_vlseg8:
1743 case Intrinsic::riscv_vlseg2ff:
1744 case Intrinsic::riscv_vlseg3ff:
1745 case Intrinsic::riscv_vlseg4ff:
1746 case Intrinsic::riscv_vlseg5ff:
1747 case Intrinsic::riscv_vlseg6ff:
1748 case Intrinsic::riscv_vlseg7ff:
1749 case Intrinsic::riscv_vlseg8ff:
1750 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1751 /*IsStore*/ false,
1752 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1753 case Intrinsic::riscv_vlseg2_mask:
1754 case Intrinsic::riscv_vlseg3_mask:
1755 case Intrinsic::riscv_vlseg4_mask:
1756 case Intrinsic::riscv_vlseg5_mask:
1757 case Intrinsic::riscv_vlseg6_mask:
1758 case Intrinsic::riscv_vlseg7_mask:
1759 case Intrinsic::riscv_vlseg8_mask:
1760 case Intrinsic::riscv_vlseg2ff_mask:
1761 case Intrinsic::riscv_vlseg3ff_mask:
1762 case Intrinsic::riscv_vlseg4ff_mask:
1763 case Intrinsic::riscv_vlseg5ff_mask:
1764 case Intrinsic::riscv_vlseg6ff_mask:
1765 case Intrinsic::riscv_vlseg7ff_mask:
1766 case Intrinsic::riscv_vlseg8ff_mask:
1767 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1768 /*IsStore*/ false,
1769 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1770 case Intrinsic::riscv_vlsseg2:
1771 case Intrinsic::riscv_vlsseg3:
1772 case Intrinsic::riscv_vlsseg4:
1773 case Intrinsic::riscv_vlsseg5:
1774 case Intrinsic::riscv_vlsseg6:
1775 case Intrinsic::riscv_vlsseg7:
1776 case Intrinsic::riscv_vlsseg8:
1777 case Intrinsic::riscv_vloxseg2:
1778 case Intrinsic::riscv_vloxseg3:
1779 case Intrinsic::riscv_vloxseg4:
1780 case Intrinsic::riscv_vloxseg5:
1781 case Intrinsic::riscv_vloxseg6:
1782 case Intrinsic::riscv_vloxseg7:
1783 case Intrinsic::riscv_vloxseg8:
1784 case Intrinsic::riscv_vluxseg2:
1785 case Intrinsic::riscv_vluxseg3:
1786 case Intrinsic::riscv_vluxseg4:
1787 case Intrinsic::riscv_vluxseg5:
1788 case Intrinsic::riscv_vluxseg6:
1789 case Intrinsic::riscv_vluxseg7:
1790 case Intrinsic::riscv_vluxseg8:
1791 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1792 /*IsStore*/ false,
1793 /*IsUnitStrided*/ false);
1794 case Intrinsic::riscv_vlsseg2_mask:
1795 case Intrinsic::riscv_vlsseg3_mask:
1796 case Intrinsic::riscv_vlsseg4_mask:
1797 case Intrinsic::riscv_vlsseg5_mask:
1798 case Intrinsic::riscv_vlsseg6_mask:
1799 case Intrinsic::riscv_vlsseg7_mask:
1800 case Intrinsic::riscv_vlsseg8_mask:
1801 case Intrinsic::riscv_vloxseg2_mask:
1802 case Intrinsic::riscv_vloxseg3_mask:
1803 case Intrinsic::riscv_vloxseg4_mask:
1804 case Intrinsic::riscv_vloxseg5_mask:
1805 case Intrinsic::riscv_vloxseg6_mask:
1806 case Intrinsic::riscv_vloxseg7_mask:
1807 case Intrinsic::riscv_vloxseg8_mask:
1808 case Intrinsic::riscv_vluxseg2_mask:
1809 case Intrinsic::riscv_vluxseg3_mask:
1810 case Intrinsic::riscv_vluxseg4_mask:
1811 case Intrinsic::riscv_vluxseg5_mask:
1812 case Intrinsic::riscv_vluxseg6_mask:
1813 case Intrinsic::riscv_vluxseg7_mask:
1814 case Intrinsic::riscv_vluxseg8_mask:
1815 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 6,
1816 /*IsStore*/ false,
1817 /*IsUnitStrided*/ false);
1818 case Intrinsic::riscv_vsseg2:
1819 case Intrinsic::riscv_vsseg3:
1820 case Intrinsic::riscv_vsseg4:
1821 case Intrinsic::riscv_vsseg5:
1822 case Intrinsic::riscv_vsseg6:
1823 case Intrinsic::riscv_vsseg7:
1824 case Intrinsic::riscv_vsseg8:
1825 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1826 /*IsStore*/ true,
1827 /*IsUnitStrided*/ false);
1828 case Intrinsic::riscv_vsseg2_mask:
1829 case Intrinsic::riscv_vsseg3_mask:
1830 case Intrinsic::riscv_vsseg4_mask:
1831 case Intrinsic::riscv_vsseg5_mask:
1832 case Intrinsic::riscv_vsseg6_mask:
1833 case Intrinsic::riscv_vsseg7_mask:
1834 case Intrinsic::riscv_vsseg8_mask:
1835 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1836 /*IsStore*/ true,
1837 /*IsUnitStrided*/ false);
1838 case Intrinsic::riscv_vssseg2:
1839 case Intrinsic::riscv_vssseg3:
1840 case Intrinsic::riscv_vssseg4:
1841 case Intrinsic::riscv_vssseg5:
1842 case Intrinsic::riscv_vssseg6:
1843 case Intrinsic::riscv_vssseg7:
1844 case Intrinsic::riscv_vssseg8:
1845 case Intrinsic::riscv_vsoxseg2:
1846 case Intrinsic::riscv_vsoxseg3:
1847 case Intrinsic::riscv_vsoxseg4:
1848 case Intrinsic::riscv_vsoxseg5:
1849 case Intrinsic::riscv_vsoxseg6:
1850 case Intrinsic::riscv_vsoxseg7:
1851 case Intrinsic::riscv_vsoxseg8:
1852 case Intrinsic::riscv_vsuxseg2:
1853 case Intrinsic::riscv_vsuxseg3:
1854 case Intrinsic::riscv_vsuxseg4:
1855 case Intrinsic::riscv_vsuxseg5:
1856 case Intrinsic::riscv_vsuxseg6:
1857 case Intrinsic::riscv_vsuxseg7:
1858 case Intrinsic::riscv_vsuxseg8:
1859 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1860 /*IsStore*/ true,
1861 /*IsUnitStrided*/ false);
1862 case Intrinsic::riscv_vssseg2_mask:
1863 case Intrinsic::riscv_vssseg3_mask:
1864 case Intrinsic::riscv_vssseg4_mask:
1865 case Intrinsic::riscv_vssseg5_mask:
1866 case Intrinsic::riscv_vssseg6_mask:
1867 case Intrinsic::riscv_vssseg7_mask:
1868 case Intrinsic::riscv_vssseg8_mask:
1869 case Intrinsic::riscv_vsoxseg2_mask:
1870 case Intrinsic::riscv_vsoxseg3_mask:
1871 case Intrinsic::riscv_vsoxseg4_mask:
1872 case Intrinsic::riscv_vsoxseg5_mask:
1873 case Intrinsic::riscv_vsoxseg6_mask:
1874 case Intrinsic::riscv_vsoxseg7_mask:
1875 case Intrinsic::riscv_vsoxseg8_mask:
1876 case Intrinsic::riscv_vsuxseg2_mask:
1877 case Intrinsic::riscv_vsuxseg3_mask:
1878 case Intrinsic::riscv_vsuxseg4_mask:
1879 case Intrinsic::riscv_vsuxseg5_mask:
1880 case Intrinsic::riscv_vsuxseg6_mask:
1881 case Intrinsic::riscv_vsuxseg7_mask:
1882 case Intrinsic::riscv_vsuxseg8_mask:
1883 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1884 /*IsStore*/ true,
1885 /*IsUnitStrided*/ false);
1886 }
1887}
1888
1890 const AddrMode &AM, Type *Ty,
1891 unsigned AS,
1892 Instruction *I) const {
1893 // No global is ever allowed as a base.
1894 if (AM.BaseGV)
1895 return false;
1896
1897 // None of our addressing modes allows a scalable offset
1898 if (AM.ScalableOffset)
1899 return false;
1900
1901 // RVV instructions only support register addressing.
1902 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1903 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1904
1905 // Require a 12-bit signed offset.
1906 if (!isInt<12>(AM.BaseOffs))
1907 return false;
1908
1909 switch (AM.Scale) {
1910 case 0: // "r+i" or just "i", depending on HasBaseReg.
1911 break;
1912 case 1:
1913 if (!AM.HasBaseReg) // allow "r+i".
1914 break;
1915 return false; // disallow "r+r" or "r+r+i".
1916 default:
1917 return false;
1918 }
1919
1920 return true;
1921}
1922
1924 return isInt<12>(Imm);
1925}
1926
1928 return isInt<12>(Imm);
1929}
1930
1931// On RV32, 64-bit integers are split into their high and low parts and held
1932// in two different registers, so the trunc is free since the low register can
1933// just be used.
1934// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1935// isTruncateFree?
1937 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1938 return false;
1939 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1940 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1941 return (SrcBits == 64 && DestBits == 32);
1942}
1943
1945 // We consider i64->i32 free on RV64 since we have good selection of W
1946 // instructions that make promoting operations back to i64 free in many cases.
1947 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1948 !DstVT.isInteger())
1949 return false;
1950 unsigned SrcBits = SrcVT.getSizeInBits();
1951 unsigned DestBits = DstVT.getSizeInBits();
1952 return (SrcBits == 64 && DestBits == 32);
1953}
1954
1956 EVT SrcVT = Val.getValueType();
1957 // free truncate from vnsrl and vnsra
1958 if (Subtarget.hasVInstructions() &&
1959 (Val.getOpcode() == ISD::SRL || Val.getOpcode() == ISD::SRA) &&
1960 SrcVT.isVector() && VT2.isVector()) {
1961 unsigned SrcBits = SrcVT.getVectorElementType().getSizeInBits();
1962 unsigned DestBits = VT2.getVectorElementType().getSizeInBits();
1963 if (SrcBits == DestBits * 2) {
1964 return true;
1965 }
1966 }
1967 return TargetLowering::isTruncateFree(Val, VT2);
1968}
1969
1971 // Zexts are free if they can be combined with a load.
1972 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1973 // poorly with type legalization of compares preferring sext.
1974 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1975 EVT MemVT = LD->getMemoryVT();
1976 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1977 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1978 LD->getExtensionType() == ISD::ZEXTLOAD))
1979 return true;
1980 }
1981
1982 return TargetLowering::isZExtFree(Val, VT2);
1983}
1984
1986 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1987}
1988
1990 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1991}
1992
1994 return Subtarget.hasStdExtZbb() ||
1995 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());
1996}
1997
1999 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
2000 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());
2001}
2002
2004 const Instruction &AndI) const {
2005 // We expect to be able to match a bit extraction instruction if the Zbs
2006 // extension is supported and the mask is a power of two. However, we
2007 // conservatively return false if the mask would fit in an ANDI instruction,
2008 // on the basis that it's possible the sinking+duplication of the AND in
2009 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
2010 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
2011 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
2012 return false;
2013 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
2014 if (!Mask)
2015 return false;
2016 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
2017}
2018
2020 EVT VT = Y.getValueType();
2021
2022 // FIXME: Support vectors once we have tests.
2023 if (VT.isVector())
2024 return false;
2025
2026 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
2027 (!isa<ConstantSDNode>(Y) || cast<ConstantSDNode>(Y)->isOpaque());
2028}
2029
2031 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
2032 if (Subtarget.hasStdExtZbs())
2033 return X.getValueType().isScalarInteger();
2034 auto *C = dyn_cast<ConstantSDNode>(Y);
2035 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
2036 if (Subtarget.hasVendorXTHeadBs())
2037 return C != nullptr;
2038 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
2039 return C && C->getAPIntValue().ule(10);
2040}
2041
2043 EVT VT) const {
2044 // Only enable for rvv.
2045 if (!VT.isVector() || !Subtarget.hasVInstructions())
2046 return false;
2047
2048 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
2049 return false;
2050
2051 return true;
2052}
2053
2055 Type *Ty) const {
2056 assert(Ty->isIntegerTy());
2057
2058 unsigned BitSize = Ty->getIntegerBitWidth();
2059 if (BitSize > Subtarget.getXLen())
2060 return false;
2061
2062 // Fast path, assume 32-bit immediates are cheap.
2063 int64_t Val = Imm.getSExtValue();
2064 if (isInt<32>(Val))
2065 return true;
2066
2067 // A constant pool entry may be more aligned thant he load we're trying to
2068 // replace. If we don't support unaligned scalar mem, prefer the constant
2069 // pool.
2070 // TODO: Can the caller pass down the alignment?
2071 if (!Subtarget.enableUnalignedScalarMem())
2072 return true;
2073
2074 // Prefer to keep the load if it would require many instructions.
2075 // This uses the same threshold we use for constant pools but doesn't
2076 // check useConstantPoolForLargeInts.
2077 // TODO: Should we keep the load only when we're definitely going to emit a
2078 // constant pool?
2079
2081 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
2082}
2083
2087 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
2088 SelectionDAG &DAG) const {
2089 // One interesting pattern that we'd want to form is 'bit extract':
2090 // ((1 >> Y) & 1) ==/!= 0
2091 // But we also need to be careful not to try to reverse that fold.
2092
2093 // Is this '((1 >> Y) & 1)'?
2094 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
2095 return false; // Keep the 'bit extract' pattern.
2096
2097 // Will this be '((1 >> Y) & 1)' after the transform?
2098 if (NewShiftOpcode == ISD::SRL && CC->isOne())
2099 return true; // Do form the 'bit extract' pattern.
2100
2101 // If 'X' is a constant, and we transform, then we will immediately
2102 // try to undo the fold, thus causing endless combine loop.
2103 // So only do the transform if X is not a constant. This matches the default
2104 // implementation of this function.
2105 return !XC;
2106}
2107
2109 unsigned Opc = VecOp.getOpcode();
2110
2111 // Assume target opcodes can't be scalarized.
2112 // TODO - do we have any exceptions?
2113 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
2114 return false;
2115
2116 // If the vector op is not supported, try to convert to scalar.
2117 EVT VecVT = VecOp.getValueType();
2118 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
2119 return true;
2120
2121 // If the vector op is supported, but the scalar op is not, the transform may
2122 // not be worthwhile.
2123 // Permit a vector binary operation can be converted to scalar binary
2124 // operation which is custom lowered with illegal type.
2125 EVT ScalarVT = VecVT.getScalarType();
2126 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2127 isOperationCustom(Opc, ScalarVT);
2128}
2129
2131 const GlobalAddressSDNode *GA) const {
2132 // In order to maximise the opportunity for common subexpression elimination,
2133 // keep a separate ADD node for the global address offset instead of folding
2134 // it in the global address node. Later peephole optimisations may choose to
2135 // fold it back in when profitable.
2136 return false;
2137}
2138
2139// Returns 0-31 if the fli instruction is available for the type and this is
2140// legal FP immediate for the type. Returns -1 otherwise.
2142 if (!Subtarget.hasStdExtZfa())
2143 return -1;
2144
2145 bool IsSupportedVT = false;
2146 if (VT == MVT::f16) {
2147 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2148 } else if (VT == MVT::f32) {
2149 IsSupportedVT = true;
2150 } else if (VT == MVT::f64) {
2151 assert(Subtarget.hasStdExtD() && "Expect D extension");
2152 IsSupportedVT = true;
2153 }
2154
2155 if (!IsSupportedVT)
2156 return -1;
2157
2158 return RISCVLoadFPImm::getLoadFPImm(Imm);
2159}
2160
2162 bool ForCodeSize) const {
2163 bool IsLegalVT = false;
2164 if (VT == MVT::f16)
2165 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2166 else if (VT == MVT::f32)
2167 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2168 else if (VT == MVT::f64)
2169 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2170 else if (VT == MVT::bf16)
2171 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2172
2173 if (!IsLegalVT)
2174 return false;
2175
2176 if (getLegalZfaFPImm(Imm, VT) >= 0)
2177 return true;
2178
2179 // Cannot create a 64 bit floating-point immediate value for rv32.
2180 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2181 // td can handle +0.0 or -0.0 already.
2182 // -0.0 can be created by fmv + fneg.
2183 return Imm.isZero();
2184 }
2185
2186 // Special case: fmv + fneg
2187 if (Imm.isNegZero())
2188 return true;
2189
2190 // Building an integer and then converting requires a fmv at the end of
2191 // the integer sequence. The fmv is not required for Zfinx.
2192 const int FmvCost = Subtarget.hasStdExtZfinx() ? 0 : 1;
2193 const int Cost =
2194 FmvCost + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(),
2195 Subtarget.getXLen(), Subtarget);
2196 return Cost <= FPImmCost;
2197}
2198
2199// TODO: This is very conservative.
2201 unsigned Index) const {
2203 return false;
2204
2205 // Only support extracting a fixed from a fixed vector for now.
2206 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2207 return false;
2208
2209 EVT EltVT = ResVT.getVectorElementType();
2210 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2211
2212 // The smallest type we can slide is i8.
2213 // TODO: We can extract index 0 from a mask vector without a slide.
2214 if (EltVT == MVT::i1)
2215 return false;
2216
2217 unsigned ResElts = ResVT.getVectorNumElements();
2218 unsigned SrcElts = SrcVT.getVectorNumElements();
2219
2220 unsigned MinVLen = Subtarget.getRealMinVLen();
2221 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2222
2223 // If we're extracting only data from the first VLEN bits of the source
2224 // then we can always do this with an m1 vslidedown.vx. Restricting the
2225 // Index ensures we can use a vslidedown.vi.
2226 // TODO: We can generalize this when the exact VLEN is known.
2227 if (Index + ResElts <= MinVLMAX && Index < 31)
2228 return true;
2229
2230 // Convervatively only handle extracting half of a vector.
2231 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2232 // the upper half of a vector until we have more test coverage.
2233 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2234 // a cheap extract. However, this case is important in practice for
2235 // shuffled extracts of longer vectors. How resolve?
2236 return (ResElts * 2) == SrcElts && (Index == 0 || Index == ResElts);
2237}
2238
2241 EVT VT) const {
2242 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2243 // We might still end up using a GPR but that will be decided based on ABI.
2244 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2245 !Subtarget.hasStdExtZfhminOrZhinxmin())
2246 return MVT::f32;
2247
2249
2250 return PartVT;
2251}
2252
2253unsigned
2255 std::optional<MVT> RegisterVT) const {
2256 // Pair inline assembly operand
2257 if (VT == (Subtarget.is64Bit() ? MVT::i128 : MVT::i64) && RegisterVT &&
2258 *RegisterVT == MVT::Untyped)
2259 return 1;
2260
2261 return TargetLowering::getNumRegisters(Context, VT, RegisterVT);
2262}
2263
2266 EVT VT) const {
2267 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2268 // We might still end up using a GPR but that will be decided based on ABI.
2269 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2270 !Subtarget.hasStdExtZfhminOrZhinxmin())
2271 return 1;
2272
2274}
2275
2277 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2278 unsigned &NumIntermediates, MVT &RegisterVT) const {
2280 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2281
2282 return NumRegs;
2283}
2284
2285// Changes the condition code and swaps operands if necessary, so the SetCC
2286// operation matches one of the comparisons supported directly by branches
2287// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2288// with 1/-1.
2289static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
2290 ISD::CondCode &CC, SelectionDAG &DAG) {
2291 // If this is a single bit test that can't be handled by ANDI, shift the
2292 // bit to be tested to the MSB and perform a signed compare with 0.
2293 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2294 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2295 isa<ConstantSDNode>(LHS.getOperand(1))) {
2296 uint64_t Mask = LHS.getConstantOperandVal(1);
2297 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2298 unsigned ShAmt = 0;
2299 if (isPowerOf2_64(Mask)) {
2301 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2302 } else {
2303 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2304 }
2305
2306 LHS = LHS.getOperand(0);
2307 if (ShAmt != 0)
2308 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2309 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2310 return;
2311 }
2312 }
2313
2314 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2315 int64_t C = RHSC->getSExtValue();
2316 switch (CC) {
2317 default: break;
2318 case ISD::SETGT:
2319 // Convert X > -1 to X >= 0.
2320 if (C == -1) {
2321 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2322 CC = ISD::SETGE;
2323 return;
2324 }
2325 break;
2326 case ISD::SETLT:
2327 // Convert X < 1 to 0 >= X.
2328 if (C == 1) {
2329 RHS = LHS;
2330 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2331 CC = ISD::SETGE;
2332 return;
2333 }
2334 break;
2335 }
2336 }
2337
2338 switch (CC) {
2339 default:
2340 break;
2341 case ISD::SETGT:
2342 case ISD::SETLE:
2343 case ISD::SETUGT:
2344 case ISD::SETULE:
2346 std::swap(LHS, RHS);
2347 break;
2348 }
2349}
2350
2352 if (VT.isRISCVVectorTuple()) {
2353 if (VT.SimpleTy >= MVT::riscv_nxv1i8x2 &&
2354 VT.SimpleTy <= MVT::riscv_nxv1i8x8)
2355 return RISCVII::LMUL_F8;
2356 if (VT.SimpleTy >= MVT::riscv_nxv2i8x2 &&
2357 VT.SimpleTy <= MVT::riscv_nxv2i8x8)
2358 return RISCVII::LMUL_F4;
2359 if (VT.SimpleTy >= MVT::riscv_nxv4i8x2 &&
2360 VT.SimpleTy <= MVT::riscv_nxv4i8x8)
2361 return RISCVII::LMUL_F2;
2362 if (VT.SimpleTy >= MVT::riscv_nxv8i8x2 &&
2363 VT.SimpleTy <= MVT::riscv_nxv8i8x8)
2364 return RISCVII::LMUL_1;
2365 if (VT.SimpleTy >= MVT::riscv_nxv16i8x2 &&
2366 VT.SimpleTy <= MVT::riscv_nxv16i8x4)
2367 return RISCVII::LMUL_2;
2368 if (VT.SimpleTy == MVT::riscv_nxv32i8x2)
2369 return RISCVII::LMUL_4;
2370 llvm_unreachable("Invalid vector tuple type LMUL.");
2371 }
2372
2373 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2374 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2375 if (VT.getVectorElementType() == MVT::i1)
2376 KnownSize *= 8;
2377
2378 switch (KnownSize) {
2379 default:
2380 llvm_unreachable("Invalid LMUL.");
2381 case 8:
2383 case 16:
2385 case 32:
2387 case 64:
2389 case 128:
2391 case 256:
2393 case 512:
2395 }
2396}
2397
2399 switch (LMul) {
2400 default:
2401 llvm_unreachable("Invalid LMUL.");
2406 return RISCV::VRRegClassID;
2408 return RISCV::VRM2RegClassID;
2410 return RISCV::VRM4RegClassID;
2412 return RISCV::VRM8RegClassID;
2413 }
2414}
2415
2416unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
2417 RISCVII::VLMUL LMUL = getLMUL(VT);
2418 if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
2419 LMUL == RISCVII::VLMUL::LMUL_F4 ||
2420 LMUL == RISCVII::VLMUL::LMUL_F2 ||
2421 LMUL == RISCVII::VLMUL::LMUL_1) {
2422 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2423 "Unexpected subreg numbering");
2424 return RISCV::sub_vrm1_0 + Index;
2425 }
2426 if (LMUL == RISCVII::VLMUL::LMUL_2) {
2427 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2428 "Unexpected subreg numbering");
2429 return RISCV::sub_vrm2_0 + Index;
2430 }
2431 if (LMUL == RISCVII::VLMUL::LMUL_4) {
2432 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2433 "Unexpected subreg numbering");
2434 return RISCV::sub_vrm4_0 + Index;
2435 }
2436 llvm_unreachable("Invalid vector type.");
2437}
2438
2440 if (VT.isRISCVVectorTuple()) {
2441 unsigned NF = VT.getRISCVVectorTupleNumFields();
2442 unsigned RegsPerField =
2443 std::max(1U, (unsigned)VT.getSizeInBits().getKnownMinValue() /
2444 (NF * RISCV::RVVBitsPerBlock));
2445 switch (RegsPerField) {
2446 case 1:
2447 if (NF == 2)
2448 return RISCV::VRN2M1RegClassID;
2449 if (NF == 3)
2450 return RISCV::VRN3M1RegClassID;
2451 if (NF == 4)
2452 return RISCV::VRN4M1RegClassID;
2453 if (NF == 5)
2454 return RISCV::VRN5M1RegClassID;
2455 if (NF == 6)
2456 return RISCV::VRN6M1RegClassID;
2457 if (NF == 7)
2458 return RISCV::VRN7M1RegClassID;
2459 if (NF == 8)
2460 return RISCV::VRN8M1RegClassID;
2461 break;
2462 case 2:
2463 if (NF == 2)
2464 return RISCV::VRN2M2RegClassID;
2465 if (NF == 3)
2466 return RISCV::VRN3M2RegClassID;
2467 if (NF == 4)
2468 return RISCV::VRN4M2RegClassID;
2469 break;
2470 case 4:
2471 assert(NF == 2);
2472 return RISCV::VRN2M4RegClassID;
2473 default:
2474 break;
2475 }
2476 llvm_unreachable("Invalid vector tuple type RegClass.");
2477 }
2478
2479 if (VT.getVectorElementType() == MVT::i1)
2480 return RISCV::VRRegClassID;
2481 return getRegClassIDForLMUL(getLMUL(VT));
2482}
2483
2484// Attempt to decompose a subvector insert/extract between VecVT and
2485// SubVecVT via subregister indices. Returns the subregister index that
2486// can perform the subvector insert/extract with the given element index, as
2487// well as the index corresponding to any leftover subvectors that must be
2488// further inserted/extracted within the register class for SubVecVT.
2489std::pair<unsigned, unsigned>
2491 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2492 const RISCVRegisterInfo *TRI) {
2493 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2494 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2495 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2496 "Register classes not ordered");
2497 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2498 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2499
2500 // If VecVT is a vector tuple type, either it's the tuple type with same
2501 // RegClass with SubVecVT or SubVecVT is a actually a subvector of the VecVT.
2502 if (VecVT.isRISCVVectorTuple()) {
2503 if (VecRegClassID == SubRegClassID)
2504 return {RISCV::NoSubRegister, 0};
2505
2506 assert(SubVecVT.isScalableVector() &&
2507 "Only allow scalable vector subvector.");
2508 assert(getLMUL(VecVT) == getLMUL(SubVecVT) &&
2509 "Invalid vector tuple insert/extract for vector and subvector with "
2510 "different LMUL.");
2511 return {getSubregIndexByMVT(VecVT, InsertExtractIdx), 0};
2512 }
2513
2514 // Try to compose a subregister index that takes us from the incoming
2515 // LMUL>1 register class down to the outgoing one. At each step we half
2516 // the LMUL:
2517 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2518 // Note that this is not guaranteed to find a subregister index, such as
2519 // when we are extracting from one VR type to another.
2520 unsigned SubRegIdx = RISCV::NoSubRegister;
2521 for (const unsigned RCID :
2522 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2523 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2524 VecVT = VecVT.getHalfNumVectorElementsVT();
2525 bool IsHi =
2526 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2527 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2528 getSubregIndexByMVT(VecVT, IsHi));
2529 if (IsHi)
2530 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2531 }
2532 return {SubRegIdx, InsertExtractIdx};
2533}
2534
2535// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2536// stores for those types.
2537bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2538 return !Subtarget.useRVVForFixedLengthVectors() ||
2539 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2540}
2541
2543 if (!ScalarTy.isSimple())
2544 return false;
2545 switch (ScalarTy.getSimpleVT().SimpleTy) {
2546 case MVT::iPTR:
2547 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2548 case MVT::i8:
2549 case MVT::i16:
2550 case MVT::i32:
2551 return true;
2552 case MVT::i64:
2553 return Subtarget.hasVInstructionsI64();
2554 case MVT::f16:
2555 return Subtarget.hasVInstructionsF16Minimal();
2556 case MVT::bf16:
2557 return Subtarget.hasVInstructionsBF16Minimal();
2558 case MVT::f32:
2559 return Subtarget.hasVInstructionsF32();
2560 case MVT::f64:
2561 return Subtarget.hasVInstructionsF64();
2562 default:
2563 return false;
2564 }
2565}
2566
2567
2568unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2569 return NumRepeatedDivisors;
2570}
2571
2573 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2574 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2575 "Unexpected opcode");
2576 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2577 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2579 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2580 if (!II)
2581 return SDValue();
2582 return Op.getOperand(II->VLOperand + 1 + HasChain);
2583}
2584
2586 const RISCVSubtarget &Subtarget) {
2587 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2588 if (!Subtarget.useRVVForFixedLengthVectors())
2589 return false;
2590
2591 // We only support a set of vector types with a consistent maximum fixed size
2592 // across all supported vector element types to avoid legalization issues.
2593 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2594 // fixed-length vector type we support is 1024 bytes.
2595 if (VT.getFixedSizeInBits() > 1024 * 8)
2596 return false;
2597
2598 unsigned MinVLen = Subtarget.getRealMinVLen();
2599
2600 MVT EltVT = VT.getVectorElementType();
2601
2602 // Don't use RVV for vectors we cannot scalarize if required.
2603 switch (EltVT.SimpleTy) {
2604 // i1 is supported but has different rules.
2605 default:
2606 return false;
2607 case MVT::i1:
2608 // Masks can only use a single register.
2609 if (VT.getVectorNumElements() > MinVLen)
2610 return false;
2611 MinVLen /= 8;
2612 break;
2613 case MVT::i8:
2614 case MVT::i16:
2615 case MVT::i32:
2616 break;
2617 case MVT::i64:
2618 if (!Subtarget.hasVInstructionsI64())
2619 return false;
2620 break;
2621 case MVT::f16:
2622 if (!Subtarget.hasVInstructionsF16Minimal())
2623 return false;
2624 break;
2625 case MVT::bf16:
2626 if (!Subtarget.hasVInstructionsBF16Minimal())
2627 return false;
2628 break;
2629 case MVT::f32:
2630 if (!Subtarget.hasVInstructionsF32())
2631 return false;
2632 break;
2633 case MVT::f64:
2634 if (!Subtarget.hasVInstructionsF64())
2635 return false;
2636 break;
2637 }
2638
2639 // Reject elements larger than ELEN.
2640 if (EltVT.getSizeInBits() > Subtarget.getELen())
2641 return false;
2642
2643 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2644 // Don't use RVV for types that don't fit.
2645 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2646 return false;
2647
2648 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2649 // the base fixed length RVV support in place.
2650 if (!VT.isPow2VectorType())
2651 return false;
2652
2653 return true;
2654}
2655
2656bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2657 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2658}
2659
2660// Return the largest legal scalable vector type that matches VT's element type.
2662 const RISCVSubtarget &Subtarget) {
2663 // This may be called before legal types are setup.
2664 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2665 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2666 "Expected legal fixed length vector!");
2667
2668 unsigned MinVLen = Subtarget.getRealMinVLen();
2669 unsigned MaxELen = Subtarget.getELen();
2670
2671 MVT EltVT = VT.getVectorElementType();
2672 switch (EltVT.SimpleTy) {
2673 default:
2674 llvm_unreachable("unexpected element type for RVV container");
2675 case MVT::i1:
2676 case MVT::i8:
2677 case MVT::i16:
2678 case MVT::i32:
2679 case MVT::i64:
2680 case MVT::bf16:
2681 case MVT::f16:
2682 case MVT::f32:
2683 case MVT::f64: {
2684 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2685 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2686 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2687 unsigned NumElts =
2689 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2690 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2691 return MVT::getScalableVectorVT(EltVT, NumElts);
2692 }
2693 }
2694}
2695
2697 const RISCVSubtarget &Subtarget) {
2699 Subtarget);
2700}
2701
2703 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2704}
2705
2706// Grow V to consume an entire RVV register.
2708 const RISCVSubtarget &Subtarget) {
2709 assert(VT.isScalableVector() &&
2710 "Expected to convert into a scalable vector!");
2711 assert(V.getValueType().isFixedLengthVector() &&
2712 "Expected a fixed length vector operand!");
2713 SDLoc DL(V);
2714 SDValue Zero = DAG.getVectorIdxConstant(0, DL);
2715 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
2716}
2717
2718// Shrink V so it's just big enough to maintain a VT's worth of data.
2720 const RISCVSubtarget &Subtarget) {
2722 "Expected to convert into a fixed length vector!");
2723 assert(V.getValueType().isScalableVector() &&
2724 "Expected a scalable vector operand!");
2725 SDLoc DL(V);
2726 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2727 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
2728}
2729
2730/// Return the type of the mask type suitable for masking the provided
2731/// vector type. This is simply an i1 element type vector of the same
2732/// (possibly scalable) length.
2733static MVT getMaskTypeFor(MVT VecVT) {
2734 assert(VecVT.isVector());
2736 return MVT::getVectorVT(MVT::i1, EC);
2737}
2738
2739/// Creates an all ones mask suitable for masking a vector of type VecTy with
2740/// vector length VL. .
2741static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2742 SelectionDAG &DAG) {
2743 MVT MaskVT = getMaskTypeFor(VecVT);
2744 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2745}
2746
2747static std::pair<SDValue, SDValue>
2749 const RISCVSubtarget &Subtarget) {
2750 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2751 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2752 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2753 return {Mask, VL};
2754}
2755
2756static std::pair<SDValue, SDValue>
2757getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2758 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2759 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2760 SDValue VL = DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2761 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2762 return {Mask, VL};
2763}
2764
2765// Gets the two common "VL" operands: an all-ones mask and the vector length.
2766// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2767// the vector type that the fixed-length vector is contained in. Otherwise if
2768// VecVT is scalable, then ContainerVT should be the same as VecVT.
2769static std::pair<SDValue, SDValue>
2770getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2771 const RISCVSubtarget &Subtarget) {
2772 if (VecVT.isFixedLengthVector())
2773 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2774 Subtarget);
2775 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2776 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
2777}
2778
2780 SelectionDAG &DAG) const {
2781 assert(VecVT.isScalableVector() && "Expected scalable vector");
2782 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2783 VecVT.getVectorElementCount());
2784}
2785
2786std::pair<unsigned, unsigned>
2788 const RISCVSubtarget &Subtarget) {
2789 assert(VecVT.isScalableVector() && "Expected scalable vector");
2790
2791 unsigned EltSize = VecVT.getScalarSizeInBits();
2792 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
2793
2794 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
2795 unsigned MaxVLMAX =
2796 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
2797
2798 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
2799 unsigned MinVLMAX =
2800 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
2801
2802 return std::make_pair(MinVLMAX, MaxVLMAX);
2803}
2804
2805// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2806// of either is (currently) supported. This can get us into an infinite loop
2807// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2808// as a ..., etc.
2809// Until either (or both) of these can reliably lower any node, reporting that
2810// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2811// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2812// which is not desirable.
2814 EVT VT, unsigned DefinedValues) const {
2815 return false;
2816}
2817
2819 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2820 // implementation-defined.
2821 if (!VT.isVector())
2823 unsigned DLenFactor = Subtarget.getDLenFactor();
2824 unsigned Cost;
2825 if (VT.isScalableVector()) {
2826 unsigned LMul;
2827 bool Fractional;
2828 std::tie(LMul, Fractional) =
2830 if (Fractional)
2831 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
2832 else
2833 Cost = (LMul * DLenFactor);
2834 } else {
2835 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
2836 }
2837 return Cost;
2838}
2839
2840
2841/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
2842/// is generally quadratic in the number of vreg implied by LMUL. Note that
2843/// operand (index and possibly mask) are handled separately.
2845 return getLMULCost(VT) * getLMULCost(VT);
2846}
2847
2848/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2849/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2850/// or may track the vrgather.vv cost. It is implementation-dependent.
2852 return getLMULCost(VT);
2853}
2854
2855/// Return the cost of a vslidedown.vx or vslideup.vx instruction
2856/// for the type VT. (This does not cover the vslide1up or vslide1down
2857/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2858/// or may track the vrgather.vv cost. It is implementation-dependent.
2860 return getLMULCost(VT);
2861}
2862
2863/// Return the cost of a vslidedown.vi or vslideup.vi instruction
2864/// for the type VT. (This does not cover the vslide1up or vslide1down
2865/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2866/// or may track the vrgather.vv cost. It is implementation-dependent.
2868 return getLMULCost(VT);
2869}
2870
2872 const RISCVSubtarget &Subtarget) {
2873 // f16 conversions are promoted to f32 when Zfh/Zhinx are not supported.
2874 // bf16 conversions are always promoted to f32.
2875 if ((Op.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2876 Op.getValueType() == MVT::bf16) {
2877 bool IsStrict = Op->isStrictFPOpcode();
2878
2879 SDLoc DL(Op);
2880 if (IsStrict) {
2881 SDValue Val = DAG.getNode(Op.getOpcode(), DL, {MVT::f32, MVT::Other},
2882 {Op.getOperand(0), Op.getOperand(1)});
2883 return DAG.getNode(ISD::STRICT_FP_ROUND, DL,
2884 {Op.getValueType(), MVT::Other},
2885 {Val.getValue(1), Val.getValue(0),
2886 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)});
2887 }
2888 return DAG.getNode(
2889 ISD::FP_ROUND, DL, Op.getValueType(),
2890 DAG.getNode(Op.getOpcode(), DL, MVT::f32, Op.getOperand(0)),
2891 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
2892 }
2893
2894 // Other operations are legal.
2895 return Op;
2896}
2897
2899 const RISCVSubtarget &Subtarget) {
2900 // RISC-V FP-to-int conversions saturate to the destination register size, but
2901 // don't produce 0 for nan. We can use a conversion instruction and fix the
2902 // nan case with a compare and a select.
2903 SDValue Src = Op.getOperand(0);
2904
2905 MVT DstVT = Op.getSimpleValueType();
2906 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2907
2908 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
2909
2910 if (!DstVT.isVector()) {
2911 // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
2912 // the result.
2913 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2914 Src.getValueType() == MVT::bf16) {
2915 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
2916 }
2917
2918 unsigned Opc;
2919 if (SatVT == DstVT)
2920 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
2921 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
2923 else
2924 return SDValue();
2925 // FIXME: Support other SatVTs by clamping before or after the conversion.
2926
2927 SDLoc DL(Op);
2928 SDValue FpToInt = DAG.getNode(
2929 Opc, DL, DstVT, Src,
2931
2932 if (Opc == RISCVISD::FCVT_WU_RV64)
2933 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
2934
2935 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
2936 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
2938 }
2939
2940 // Vectors.
2941
2942 MVT DstEltVT = DstVT.getVectorElementType();
2943 MVT SrcVT = Src.getSimpleValueType();
2944 MVT SrcEltVT = SrcVT.getVectorElementType();
2945 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2946 unsigned DstEltSize = DstEltVT.getSizeInBits();
2947
2948 // Only handle saturating to the destination type.
2949 if (SatVT != DstEltVT)
2950 return SDValue();
2951
2952 MVT DstContainerVT = DstVT;
2953 MVT SrcContainerVT = SrcVT;
2954 if (DstVT.isFixedLengthVector()) {
2955 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
2956 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
2957 assert(DstContainerVT.getVectorElementCount() ==
2958 SrcContainerVT.getVectorElementCount() &&
2959 "Expected same element count");
2960 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2961 }
2962
2963 SDLoc DL(Op);
2964
2965 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
2966
2967 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
2968 {Src, Src, DAG.getCondCode(ISD::SETNE),
2969 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2970
2971 // Need to widen by more than 1 step, promote the FP type, then do a widening
2972 // convert.
2973 if (DstEltSize > (2 * SrcEltSize)) {
2974 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
2975 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
2976 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
2977 }
2978
2979 MVT CvtContainerVT = DstContainerVT;
2980 MVT CvtEltVT = DstEltVT;
2981 if (SrcEltSize > (2 * DstEltSize)) {
2982 CvtEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
2983 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
2984 }
2985
2986 unsigned RVVOpc =
2988 SDValue Res = DAG.getNode(RVVOpc, DL, CvtContainerVT, Src, Mask, VL);
2989
2990 while (CvtContainerVT != DstContainerVT) {
2991 CvtEltVT = MVT::getIntegerVT(CvtEltVT.getSizeInBits() / 2);
2992 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
2993 // Rounding mode here is arbitrary since we aren't shifting out any bits.
2994 unsigned ClipOpc = IsSigned ? RISCVISD::TRUNCATE_VECTOR_VL_SSAT
2996 Res = DAG.getNode(ClipOpc, DL, CvtContainerVT, Res, Mask, VL);
2997 }
2998
2999 SDValue SplatZero = DAG.getNode(
3000 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
3001 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
3002 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
3003 Res, DAG.getUNDEF(DstContainerVT), VL);
3004
3005 if (DstVT.isFixedLengthVector())
3006 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
3007
3008 return Res;
3009}
3010
3012 const RISCVSubtarget &Subtarget) {
3013 bool IsStrict = Op->isStrictFPOpcode();
3014 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3015
3016 // f16 conversions are promoted to f32 when Zfh/Zhinx is not enabled.
3017 // bf16 conversions are always promoted to f32.
3018 if ((SrcVal.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3019 SrcVal.getValueType() == MVT::bf16) {
3020 SDLoc DL(Op);
3021 if (IsStrict) {
3022 SDValue Ext =
3023 DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
3024 {Op.getOperand(0), SrcVal});
3025 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
3026 {Ext.getValue(1), Ext.getValue(0)});
3027 }
3028 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
3029 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, SrcVal));
3030 }
3031
3032 // Other operations are legal.
3033 return Op;
3034}
3035
3037 switch (Opc) {
3038 case ISD::FROUNDEVEN:
3040 case ISD::VP_FROUNDEVEN:
3041 return RISCVFPRndMode::RNE;
3042 case ISD::FTRUNC:
3043 case ISD::STRICT_FTRUNC:
3044 case ISD::VP_FROUNDTOZERO:
3045 return RISCVFPRndMode::RTZ;
3046 case ISD::FFLOOR:
3047 case ISD::STRICT_FFLOOR:
3048 case ISD::VP_FFLOOR:
3049 return RISCVFPRndMode::RDN;
3050 case ISD::FCEIL:
3051 case ISD::STRICT_FCEIL:
3052 case ISD::VP_FCEIL:
3053 return RISCVFPRndMode::RUP;
3054 case ISD::FROUND:
3055 case ISD::STRICT_FROUND:
3056 case ISD::VP_FROUND:
3057 return RISCVFPRndMode::RMM;
3058 case ISD::FRINT:
3059 case ISD::VP_FRINT:
3060 return RISCVFPRndMode::DYN;
3061 }
3062
3064}
3065
3066// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
3067// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
3068// the integer domain and back. Taking care to avoid converting values that are
3069// nan or already correct.
3070static SDValue
3072 const RISCVSubtarget &Subtarget) {
3073 MVT VT = Op.getSimpleValueType();
3074 assert(VT.isVector() && "Unexpected type");
3075
3076 SDLoc DL(Op);
3077
3078 SDValue Src = Op.getOperand(0);
3079
3080 MVT ContainerVT = VT;
3081 if (VT.isFixedLengthVector()) {
3082 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3083 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3084 }
3085
3086 SDValue Mask, VL;
3087 if (Op->isVPOpcode()) {
3088 Mask = Op.getOperand(1);
3089 if (VT.isFixedLengthVector())
3090 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
3091 Subtarget);
3092 VL = Op.getOperand(2);
3093 } else {
3094 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3095 }
3096
3097 // Freeze the source since we are increasing the number of uses.
3098 Src = DAG.getFreeze(Src);
3099
3100 // We do the conversion on the absolute value and fix the sign at the end.
3101 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3102
3103 // Determine the largest integer that can be represented exactly. This and
3104 // values larger than it don't have any fractional bits so don't need to
3105 // be converted.
3106 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3107 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3108 APFloat MaxVal = APFloat(FltSem);
3109 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3110 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3111 SDValue MaxValNode =
3112 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3113 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3114 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3115
3116 // If abs(Src) was larger than MaxVal or nan, keep it.
3117 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3118 Mask =
3119 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
3120 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
3121 Mask, Mask, VL});
3122
3123 // Truncate to integer and convert back to FP.
3124 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3125 MVT XLenVT = Subtarget.getXLenVT();
3126 SDValue Truncated;
3127
3128 switch (Op.getOpcode()) {
3129 default:
3130 llvm_unreachable("Unexpected opcode");
3131 case ISD::FRINT:
3132 case ISD::VP_FRINT:
3133 case ISD::FCEIL:
3134 case ISD::VP_FCEIL:
3135 case ISD::FFLOOR:
3136 case ISD::VP_FFLOOR:
3137 case ISD::FROUND:
3138 case ISD::FROUNDEVEN:
3139 case ISD::VP_FROUND:
3140 case ISD::VP_FROUNDEVEN:
3141 case ISD::VP_FROUNDTOZERO: {
3144 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
3145 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
3146 break;
3147 }
3148 case ISD::FTRUNC:
3149 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
3150 Mask, VL);
3151 break;
3152 case ISD::FNEARBYINT:
3153 case ISD::VP_FNEARBYINT:
3154 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3155 Mask, VL);
3156 break;
3157 }
3158
3159 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3160 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3161 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3162 Mask, VL);
3163
3164 // Restore the original sign so that -0.0 is preserved.
3165 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3166 Src, Src, Mask, VL);
3167
3168 if (!VT.isFixedLengthVector())
3169 return Truncated;
3170
3171 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3172}
3173
3174// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3175// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3176// qNan and coverting the new source to integer and back to FP.
3177static SDValue
3179 const RISCVSubtarget &Subtarget) {
3180 SDLoc DL(Op);
3181 MVT VT = Op.getSimpleValueType();
3182 SDValue Chain = Op.getOperand(0);
3183 SDValue Src = Op.getOperand(1);
3184
3185 MVT ContainerVT = VT;
3186 if (VT.isFixedLengthVector()) {
3187 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3188 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3189 }
3190
3191 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3192
3193 // Freeze the source since we are increasing the number of uses.
3194 Src = DAG.getFreeze(Src);
3195
3196 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
3197 MVT MaskVT = Mask.getSimpleValueType();
3199 DAG.getVTList(MaskVT, MVT::Other),
3200 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3201 DAG.getUNDEF(MaskVT), Mask, VL});
3202 Chain = Unorder.getValue(1);
3204 DAG.getVTList(ContainerVT, MVT::Other),
3205 {Chain, Src, Src, Src, Unorder, VL});
3206 Chain = Src.getValue(1);
3207
3208 // We do the conversion on the absolute value and fix the sign at the end.
3209 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3210
3211 // Determine the largest integer that can be represented exactly. This and
3212 // values larger than it don't have any fractional bits so don't need to
3213 // be converted.
3214 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3215 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3216 APFloat MaxVal = APFloat(FltSem);
3217 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3218 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3219 SDValue MaxValNode =
3220 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3221 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3222 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3223
3224 // If abs(Src) was larger than MaxVal or nan, keep it.
3225 Mask = DAG.getNode(
3226 RISCVISD::SETCC_VL, DL, MaskVT,
3227 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3228
3229 // Truncate to integer and convert back to FP.
3230 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3231 MVT XLenVT = Subtarget.getXLenVT();
3232 SDValue Truncated;
3233
3234 switch (Op.getOpcode()) {
3235 default:
3236 llvm_unreachable("Unexpected opcode");
3237 case ISD::STRICT_FCEIL:
3238 case ISD::STRICT_FFLOOR:
3239 case ISD::STRICT_FROUND:
3243 Truncated = DAG.getNode(
3244 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3245 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3246 break;
3247 }
3248 case ISD::STRICT_FTRUNC:
3249 Truncated =
3251 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3252 break;
3255 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3256 Mask, VL);
3257 break;
3258 }
3259 Chain = Truncated.getValue(1);
3260
3261 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3262 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3263 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3264 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3265 Truncated, Mask, VL);
3266 Chain = Truncated.getValue(1);
3267 }
3268
3269 // Restore the original sign so that -0.0 is preserved.
3270 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3271 Src, Src, Mask, VL);
3272
3273 if (VT.isFixedLengthVector())
3274 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3275 return DAG.getMergeValues({Truncated, Chain}, DL);
3276}
3277
3278static SDValue
3280 const RISCVSubtarget &Subtarget) {
3281 MVT VT = Op.getSimpleValueType();
3282 if (VT.isVector())
3283 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3284
3285 if (DAG.shouldOptForSize())
3286 return SDValue();
3287
3288 SDLoc DL(Op);
3289 SDValue Src = Op.getOperand(0);
3290
3291 // Create an integer the size of the mantissa with the MSB set. This and all
3292 // values larger than it don't have any fractional bits so don't need to be
3293 // converted.
3294 const fltSemantics &FltSem = VT.getFltSemantics();
3295 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3296 APFloat MaxVal = APFloat(FltSem);
3297 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3298 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3299 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3300
3302 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3303 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3304}
3305
3306// Expand vector LRINT and LLRINT by converting to the integer domain.
3308 const RISCVSubtarget &Subtarget) {
3309 MVT VT = Op.getSimpleValueType();
3310 assert(VT.isVector() && "Unexpected type");
3311
3312 SDLoc DL(Op);
3313 SDValue Src = Op.getOperand(0);
3314 MVT ContainerVT = VT;
3315
3316 if (VT.isFixedLengthVector()) {
3317 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3318 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3319 }
3320
3321 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3322 SDValue Truncated = DAG.getNode(
3323 RISCVISD::VFCVT_RM_X_F_VL, DL, ContainerVT, Src, Mask,
3325 VL);
3326
3327 if (!VT.isFixedLengthVector())
3328 return Truncated;
3329
3330 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3331}
3332
3333static SDValue
3335 const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op,
3336 SDValue Offset, SDValue Mask, SDValue VL,
3338 if (Passthru.isUndef())
3340 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3341 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3342 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3343}
3344
3345static SDValue
3346getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3347 EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask,
3348 SDValue VL,
3350 if (Passthru.isUndef())
3352 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3353 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3354 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3355}
3356
3357static MVT getLMUL1VT(MVT VT) {
3359 "Unexpected vector MVT");
3363}
3364
3368 int64_t Addend;
3369};
3370
3371static std::optional<APInt> getExactInteger(const APFloat &APF,
3373 // We will use a SINT_TO_FP to materialize this constant so we should use a
3374 // signed APSInt here.
3375 APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3376 // We use an arbitrary rounding mode here. If a floating-point is an exact
3377 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3378 // the rounding mode changes the output value, then it is not an exact
3379 // integer.
3381 bool IsExact;
3382 // If it is out of signed integer range, it will return an invalid operation.
3383 // If it is not an exact integer, IsExact is false.
3384 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3386 !IsExact)
3387 return std::nullopt;
3388 return ValInt.extractBits(BitWidth, 0);
3389}
3390
3391// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3392// to the (non-zero) step S and start value X. This can be then lowered as the
3393// RVV sequence (VID * S) + X, for example.
3394// The step S is represented as an integer numerator divided by a positive
3395// denominator. Note that the implementation currently only identifies
3396// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3397// cannot detect 2/3, for example.
3398// Note that this method will also match potentially unappealing index
3399// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3400// determine whether this is worth generating code for.
3401//
3402// EltSizeInBits is the size of the type that the sequence will be calculated
3403// in, i.e. SEW for build_vectors or XLEN for address calculations.
3404static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3405 unsigned EltSizeInBits) {
3406 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3407 if (!cast<BuildVectorSDNode>(Op)->isConstant())
3408 return std::nullopt;
3409 bool IsInteger = Op.getValueType().isInteger();
3410
3411 std::optional<unsigned> SeqStepDenom;
3412 std::optional<APInt> SeqStepNum;
3413 std::optional<APInt> SeqAddend;
3414 std::optional<std::pair<APInt, unsigned>> PrevElt;
3415 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3416
3417 // First extract the ops into a list of constant integer values. This may not
3418 // be possible for floats if they're not all representable as integers.
3420 const unsigned OpSize = Op.getScalarValueSizeInBits();
3421 for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3422 if (Elt.isUndef()) {
3423 Elts[Idx] = std::nullopt;
3424 continue;
3425 }
3426 if (IsInteger) {
3427 Elts[Idx] = Elt->getAsAPIntVal().trunc(OpSize).zext(EltSizeInBits);
3428 } else {
3429 auto ExactInteger =
3430 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3431 if (!ExactInteger)
3432 return std::nullopt;
3433 Elts[Idx] = *ExactInteger;
3434 }
3435 }
3436
3437 for (auto [Idx, Elt] : enumerate(Elts)) {
3438 // Assume undef elements match the sequence; we just have to be careful
3439 // when interpolating across them.
3440 if (!Elt)
3441 continue;
3442
3443 if (PrevElt) {
3444 // Calculate the step since the last non-undef element, and ensure
3445 // it's consistent across the entire sequence.
3446 unsigned IdxDiff = Idx - PrevElt->second;
3447 APInt ValDiff = *Elt - PrevElt->first;
3448
3449 // A zero-value value difference means that we're somewhere in the middle
3450 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3451 // step change before evaluating the sequence.
3452 if (ValDiff == 0)
3453 continue;
3454
3455 int64_t Remainder = ValDiff.srem(IdxDiff);
3456 // Normalize the step if it's greater than 1.
3457 if (Remainder != ValDiff.getSExtValue()) {
3458 // The difference must cleanly divide the element span.
3459 if (Remainder != 0)
3460 return std::nullopt;
3461 ValDiff = ValDiff.sdiv(IdxDiff);
3462 IdxDiff = 1;
3463 }
3464
3465 if (!SeqStepNum)
3466 SeqStepNum = ValDiff;
3467 else if (ValDiff != SeqStepNum)
3468 return std::nullopt;
3469
3470 if (!SeqStepDenom)
3471 SeqStepDenom = IdxDiff;
3472 else if (IdxDiff != *SeqStepDenom)
3473 return std::nullopt;
3474 }
3475
3476 // Record this non-undef element for later.
3477 if (!PrevElt || PrevElt->first != *Elt)
3478 PrevElt = std::make_pair(*Elt, Idx);
3479 }
3480
3481 // We need to have logged a step for this to count as a legal index sequence.
3482 if (!SeqStepNum || !SeqStepDenom)
3483 return std::nullopt;
3484
3485 // Loop back through the sequence and validate elements we might have skipped
3486 // while waiting for a valid step. While doing this, log any sequence addend.
3487 for (auto [Idx, Elt] : enumerate(Elts)) {
3488 if (!Elt)
3489 continue;
3490 APInt ExpectedVal =
3491 (APInt(EltSizeInBits, Idx, /*isSigned=*/false, /*implicitTrunc=*/true) *
3492 *SeqStepNum)
3493 .sdiv(*SeqStepDenom);
3494
3495 APInt Addend = *Elt - ExpectedVal;
3496 if (!SeqAddend)
3497 SeqAddend = Addend;
3498 else if (Addend != SeqAddend)
3499 return std::nullopt;
3500 }
3501
3502 assert(SeqAddend && "Must have an addend if we have a step");
3503
3504 return VIDSequence{SeqStepNum->getSExtValue(), *SeqStepDenom,
3505 SeqAddend->getSExtValue()};
3506}
3507
3508// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3509// and lower it as a VRGATHER_VX_VL from the source vector.
3510static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3511 SelectionDAG &DAG,
3512 const RISCVSubtarget &Subtarget) {
3513 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3514 return SDValue();
3515 SDValue Vec = SplatVal.getOperand(0);
3516 // Don't perform this optimization for i1 vectors, or if the element types are
3517 // different
3518 // FIXME: Support i1 vectors, maybe by promoting to i8?
3519 MVT EltTy = VT.getVectorElementType();
3520 if (EltTy == MVT::i1 ||
3522 return SDValue();
3523 SDValue Idx = SplatVal.getOperand(1);
3524 // The index must be a legal type.
3525 if (Idx.getValueType() != Subtarget.getXLenVT())
3526 return SDValue();
3527
3528 // Check that Index lies within VT
3529 // TODO: Can we check if the Index is constant and known in-bounds?
3531 return SDValue();
3532
3533 MVT ContainerVT = VT;
3534 if (VT.isFixedLengthVector())
3535 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3536
3537 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
3538 DAG.getUNDEF(ContainerVT), Vec,
3539 DAG.getVectorIdxConstant(0, DL));
3540
3541 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3542
3543 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
3544 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3545
3546 if (!VT.isFixedLengthVector())
3547 return Gather;
3548
3549 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
3550}
3551
3552/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3553/// which constitute a large proportion of the elements. In such cases we can
3554/// splat a vector with the dominant element and make up the shortfall with
3555/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3556/// Note that this includes vectors of 2 elements by association. The
3557/// upper-most element is the "dominant" one, allowing us to use a splat to
3558/// "insert" the upper element, and an insert of the lower element at position
3559/// 0, which improves codegen.
3561 const RISCVSubtarget &Subtarget) {
3562 MVT VT = Op.getSimpleValueType();
3563 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3564
3565 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3566
3567 SDLoc DL(Op);
3568 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3569
3570 MVT XLenVT = Subtarget.getXLenVT();
3571 unsigned NumElts = Op.getNumOperands();
3572
3573 SDValue DominantValue;
3574 unsigned MostCommonCount = 0;
3575 DenseMap<SDValue, unsigned> ValueCounts;
3576 unsigned NumUndefElts =
3577 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3578
3579 // Track the number of scalar loads we know we'd be inserting, estimated as
3580 // any non-zero floating-point constant. Other kinds of element are either
3581 // already in registers or are materialized on demand. The threshold at which
3582 // a vector load is more desirable than several scalar materializion and
3583 // vector-insertion instructions is not known.
3584 unsigned NumScalarLoads = 0;
3585
3586 for (SDValue V : Op->op_values()) {
3587 if (V.isUndef())
3588 continue;
3589
3590 unsigned &Count = ValueCounts[V];
3591 if (0 == Count)
3592 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3593 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3594
3595 // Is this value dominant? In case of a tie, prefer the highest element as
3596 // it's cheaper to insert near the beginning of a vector than it is at the
3597 // end.
3598 if (++Count >= MostCommonCount) {
3599 DominantValue = V;
3600 MostCommonCount = Count;
3601 }
3602 }
3603
3604 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3605 unsigned NumDefElts = NumElts - NumUndefElts;
3606 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3607
3608 // Don't perform this optimization when optimizing for size, since
3609 // materializing elements and inserting them tends to cause code bloat.
3610 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3611 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3612 ((MostCommonCount > DominantValueCountThreshold) ||
3613 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3614 // Start by splatting the most common element.
3615 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3616
3617 DenseSet<SDValue> Processed{DominantValue};
3618
3619 // We can handle an insert into the last element (of a splat) via
3620 // v(f)slide1down. This is slightly better than the vslideup insert
3621 // lowering as it avoids the need for a vector group temporary. It
3622 // is also better than using vmerge.vx as it avoids the need to
3623 // materialize the mask in a vector register.
3624 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3625 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3626 LastOp != DominantValue) {
3627 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3628 auto OpCode =
3630 if (!VT.isFloatingPoint())
3631 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3632 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3633 LastOp, Mask, VL);
3634 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3635 Processed.insert(LastOp);
3636 }
3637
3638 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3639 for (const auto &OpIdx : enumerate(Op->ops())) {
3640 const SDValue &V = OpIdx.value();
3641 if (V.isUndef() || !Processed.insert(V).second)
3642 continue;
3643 if (ValueCounts[V] == 1) {
3644 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
3645 DAG.getVectorIdxConstant(OpIdx.index(), DL));
3646 } else {
3647 // Blend in all instances of this value using a VSELECT, using a
3648 // mask where each bit signals whether that element is the one
3649 // we're after.
3651 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3652 return DAG.getConstant(V == V1, DL, XLenVT);
3653 });
3654 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3655 DAG.getBuildVector(SelMaskTy, DL, Ops),
3656 DAG.getSplatBuildVector(VT, DL, V), Vec);
3657 }
3658 }
3659
3660 return Vec;
3661 }
3662
3663 return SDValue();
3664}
3665
3667 const RISCVSubtarget &Subtarget) {
3668 MVT VT = Op.getSimpleValueType();
3669 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3670
3671 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3672
3673 SDLoc DL(Op);
3674 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3675
3676 MVT XLenVT = Subtarget.getXLenVT();
3677 unsigned NumElts = Op.getNumOperands();
3678
3679 if (VT.getVectorElementType() == MVT::i1) {
3680 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
3681 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
3682 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
3683 }
3684
3685 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
3686 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
3687 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3688 }
3689
3690 // Lower constant mask BUILD_VECTORs via an integer vector type, in
3691 // scalar integer chunks whose bit-width depends on the number of mask
3692 // bits and XLEN.
3693 // First, determine the most appropriate scalar integer type to use. This
3694 // is at most XLenVT, but may be shrunk to a smaller vector element type
3695 // according to the size of the final vector - use i8 chunks rather than
3696 // XLenVT if we're producing a v8i1. This results in more consistent
3697 // codegen across RV32 and RV64.
3698 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
3699 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
3700 // If we have to use more than one INSERT_VECTOR_ELT then this
3701 // optimization is likely to increase code size; avoid peforming it in
3702 // such a case. We can use a load from a constant pool in this case.
3703 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3704 return SDValue();
3705 // Now we can create our integer vector type. Note that it may be larger
3706 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3707 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
3708 MVT IntegerViaVecVT =
3709 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
3710 IntegerViaVecElts);
3711
3712 uint64_t Bits = 0;
3713 unsigned BitPos = 0, IntegerEltIdx = 0;
3714 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
3715
3716 for (unsigned I = 0; I < NumElts;) {
3717 SDValue V = Op.getOperand(I);
3718 bool BitValue = !V.isUndef() && V->getAsZExtVal();
3719 Bits |= ((uint64_t)BitValue << BitPos);
3720 ++BitPos;
3721 ++I;
3722
3723 // Once we accumulate enough bits to fill our scalar type or process the
3724 // last element, insert into our vector and clear our accumulated data.
3725 if (I % NumViaIntegerBits == 0 || I == NumElts) {
3726 if (NumViaIntegerBits <= 32)
3727 Bits = SignExtend64<32>(Bits);
3728 SDValue Elt = DAG.getSignedConstant(Bits, DL, XLenVT);
3729 Elts[IntegerEltIdx] = Elt;
3730 Bits = 0;
3731 BitPos = 0;
3732 IntegerEltIdx++;
3733 }
3734 }
3735
3736 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
3737
3738 if (NumElts < NumViaIntegerBits) {
3739 // If we're producing a smaller vector than our minimum legal integer
3740 // type, bitcast to the equivalent (known-legal) mask type, and extract
3741 // our final mask.
3742 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3743 Vec = DAG.getBitcast(MVT::v8i1, Vec);
3744 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3745 DAG.getConstant(0, DL, XLenVT));
3746 } else {
3747 // Else we must have produced an integer type with the same size as the
3748 // mask type; bitcast for the final result.
3749 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3750 Vec = DAG.getBitcast(VT, Vec);
3751 }
3752
3753 return Vec;
3754 }
3755
3756 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3757 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3759 if (!VT.isFloatingPoint())
3760 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3761 Splat =
3762 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3763 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3764 }
3765
3766 // Try and match index sequences, which we can lower to the vid instruction
3767 // with optional modifications. An all-undef vector is matched by
3768 // getSplatValue, above.
3769 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3770 int64_t StepNumerator = SimpleVID->StepNumerator;
3771 unsigned StepDenominator = SimpleVID->StepDenominator;
3772 int64_t Addend = SimpleVID->Addend;
3773
3774 assert(StepNumerator != 0 && "Invalid step");
3775 bool Negate = false;
3776 int64_t SplatStepVal = StepNumerator;
3777 unsigned StepOpcode = ISD::MUL;
3778 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3779 // anyway as the shift of 63 won't fit in uimm5.
3780 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3781 isPowerOf2_64(std::abs(StepNumerator))) {
3782 Negate = StepNumerator < 0;
3783 StepOpcode = ISD::SHL;
3784 SplatStepVal = Log2_64(std::abs(StepNumerator));
3785 }
3786
3787 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3788 // threshold since it's the immediate value many RVV instructions accept.
3789 // There is no vmul.vi instruction so ensure multiply constant can fit in
3790 // a single addi instruction.
3791 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3792 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3793 isPowerOf2_32(StepDenominator) &&
3794 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3795 MVT VIDVT =
3797 MVT VIDContainerVT =
3798 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3799 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3800 // Convert right out of the scalable type so we can use standard ISD
3801 // nodes for the rest of the computation. If we used scalable types with
3802 // these, we'd lose the fixed-length vector info and generate worse
3803 // vsetvli code.
3804 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3805 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3806 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3807 SDValue SplatStep = DAG.getSignedConstant(SplatStepVal, DL, VIDVT);
3808 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3809 }
3810 if (StepDenominator != 1) {
3811 SDValue SplatStep =
3812 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3813 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3814 }
3815 if (Addend != 0 || Negate) {
3816 SDValue SplatAddend = DAG.getSignedConstant(Addend, DL, VIDVT);
3817 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3818 VID);
3819 }
3820 if (VT.isFloatingPoint()) {
3821 // TODO: Use vfwcvt to reduce register pressure.
3822 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3823 }
3824 return VID;
3825 }
3826 }
3827
3828 // For very small build_vectors, use a single scalar insert of a constant.
3829 // TODO: Base this on constant rematerialization cost, not size.
3830 const unsigned EltBitSize = VT.getScalarSizeInBits();
3831 if (VT.getSizeInBits() <= 32 &&
3833 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
3834 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
3835 "Unexpected sequence type");
3836 // If we can use the original VL with the modified element type, this
3837 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3838 // be moved into InsertVSETVLI?
3839 unsigned ViaVecLen =
3840 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
3841 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3842
3843 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3844 uint64_t SplatValue = 0;
3845 // Construct the amalgamated value at this larger vector type.
3846 for (const auto &OpIdx : enumerate(Op->op_values())) {
3847 const auto &SeqV = OpIdx.value();
3848 if (!SeqV.isUndef())
3849 SplatValue |=
3850 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
3851 }
3852
3853 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3854 // achieve better constant materializion.
3855 // On RV32, we need to sign-extend to use getSignedConstant.
3856 if (ViaIntVT == MVT::i32)
3857 SplatValue = SignExtend64<32>(SplatValue);
3858
3859 SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT,
3860 DAG.getUNDEF(ViaVecVT),
3861 DAG.getSignedConstant(SplatValue, DL, XLenVT),
3862 DAG.getVectorIdxConstant(0, DL));
3863 if (ViaVecLen != 1)
3865 MVT::getVectorVT(ViaIntVT, 1), Vec,
3866 DAG.getConstant(0, DL, XLenVT));
3867 return DAG.getBitcast(VT, Vec);
3868 }
3869
3870
3871 // Attempt to detect "hidden" splats, which only reveal themselves as splats
3872 // when re-interpreted as a vector with a larger element type. For example,
3873 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3874 // could be instead splat as
3875 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
3876 // TODO: This optimization could also work on non-constant splats, but it
3877 // would require bit-manipulation instructions to construct the splat value.
3878 SmallVector<SDValue> Sequence;
3879 const auto *BV = cast<BuildVectorSDNode>(Op);
3880 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
3882 BV->getRepeatedSequence(Sequence) &&
3883 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
3884 unsigned SeqLen = Sequence.size();
3885 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
3886 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3887 ViaIntVT == MVT::i64) &&
3888 "Unexpected sequence type");
3889
3890 // If we can use the original VL with the modified element type, this
3891 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3892 // be moved into InsertVSETVLI?
3893 const unsigned RequiredVL = NumElts / SeqLen;
3894 const unsigned ViaVecLen =
3895 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
3896 NumElts : RequiredVL;
3897 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3898
3899 unsigned EltIdx = 0;
3900 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3901 uint64_t SplatValue = 0;
3902 // Construct the amalgamated value which can be splatted as this larger
3903 // vector type.
3904 for (const auto &SeqV : Sequence) {
3905 if (!SeqV.isUndef())
3906 SplatValue |=
3907 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
3908 EltIdx++;
3909 }
3910
3911 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3912 // achieve better constant materializion.
3913 // On RV32, we need to sign-extend to use getSignedConstant.
3914 if (ViaIntVT == MVT::i32)
3915 SplatValue = SignExtend64<32>(SplatValue);
3916
3917 // Since we can't introduce illegal i64 types at this stage, we can only
3918 // perform an i64 splat on RV32 if it is its own sign-extended value. That
3919 // way we can use RVV instructions to splat.
3920 assert((ViaIntVT.bitsLE(XLenVT) ||
3921 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
3922 "Unexpected bitcast sequence");
3923 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
3924 SDValue ViaVL =
3925 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
3926 MVT ViaContainerVT =
3927 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
3928 SDValue Splat =
3929 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
3930 DAG.getUNDEF(ViaContainerVT),
3931 DAG.getSignedConstant(SplatValue, DL, XLenVT), ViaVL);
3932 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
3933 if (ViaVecLen != RequiredVL)
3935 MVT::getVectorVT(ViaIntVT, RequiredVL), Splat,
3936 DAG.getConstant(0, DL, XLenVT));
3937 return DAG.getBitcast(VT, Splat);
3938 }
3939 }
3940
3941 // If the number of signbits allows, see if we can lower as a <N x i8>.
3942 // Our main goal here is to reduce LMUL (and thus work) required to
3943 // build the constant, but we will also narrow if the resulting
3944 // narrow vector is known to materialize cheaply.
3945 // TODO: We really should be costing the smaller vector. There are
3946 // profitable cases this misses.
3947 if (EltBitSize > 8 && VT.isInteger() &&
3948 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen()) &&
3949 DAG.ComputeMaxSignificantBits(Op) <= 8) {
3950 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
3951 DL, Op->ops());
3952 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
3953 Source, DAG, Subtarget);
3954 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
3955 return convertFromScalableVector(VT, Res, DAG, Subtarget);
3956 }
3957
3958 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3959 return Res;
3960
3961 // For constant vectors, use generic constant pool lowering. Otherwise,
3962 // we'd have to materialize constants in GPRs just to move them into the
3963 // vector.
3964 return SDValue();
3965}
3966
3967static unsigned getPACKOpcode(unsigned DestBW,
3968 const RISCVSubtarget &Subtarget) {
3969 switch (DestBW) {
3970 default:
3971 llvm_unreachable("Unsupported pack size");
3972 case 16:
3973 return RISCV::PACKH;
3974 case 32:
3975 return Subtarget.is64Bit() ? RISCV::PACKW : RISCV::PACK;
3976 case 64:
3977 assert(Subtarget.is64Bit());
3978 return RISCV::PACK;
3979 }
3980}
3981
3982/// Double the element size of the build vector to reduce the number
3983/// of vslide1down in the build vector chain. In the worst case, this
3984/// trades three scalar operations for 1 vector operation. Scalar
3985/// operations are generally lower latency, and for out-of-order cores
3986/// we also benefit from additional parallelism.
3988 const RISCVSubtarget &Subtarget) {
3989 SDLoc DL(Op);
3990 MVT VT = Op.getSimpleValueType();
3991 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3992 MVT ElemVT = VT.getVectorElementType();
3993 if (!ElemVT.isInteger())
3994 return SDValue();
3995
3996 // TODO: Relax these architectural restrictions, possibly with costing
3997 // of the actual instructions required.
3998 if (!Subtarget.hasStdExtZbb() || !Subtarget.hasStdExtZba())
3999 return SDValue();
4000
4001 unsigned NumElts = VT.getVectorNumElements();
4002 unsigned ElemSizeInBits = ElemVT.getSizeInBits();
4003 if (ElemSizeInBits >= std::min(Subtarget.getELen(), Subtarget.getXLen()) ||
4004 NumElts % 2 != 0)
4005 return SDValue();
4006
4007 // Produce [B,A] packed into a type twice as wide. Note that all
4008 // scalars are XLenVT, possibly masked (see below).
4009 MVT XLenVT = Subtarget.getXLenVT();
4010 SDValue Mask = DAG.getConstant(
4011 APInt::getLowBitsSet(XLenVT.getSizeInBits(), ElemSizeInBits), DL, XLenVT);
4012 auto pack = [&](SDValue A, SDValue B) {
4013 // Bias the scheduling of the inserted operations to near the
4014 // definition of the element - this tends to reduce register
4015 // pressure overall.
4016 SDLoc ElemDL(B);
4017 if (Subtarget.hasStdExtZbkb())
4018 // Note that we're relying on the high bits of the result being
4019 // don't care. For PACKW, the result is *sign* extended.
4020 return SDValue(
4021 DAG.getMachineNode(getPACKOpcode(ElemSizeInBits * 2, Subtarget),
4022 ElemDL, XLenVT, A, B),
4023 0);
4024
4025 A = DAG.getNode(ISD::AND, SDLoc(A), XLenVT, A, Mask);
4026 B = DAG.getNode(ISD::AND, SDLoc(B), XLenVT, B, Mask);
4027 SDValue ShtAmt = DAG.getConstant(ElemSizeInBits, ElemDL, XLenVT);
4028 return DAG.getNode(ISD::OR, ElemDL, XLenVT, A,
4029 DAG.getNode(ISD::SHL, ElemDL, XLenVT, B, ShtAmt),
4031 };
4032
4033 SmallVector<SDValue> NewOperands;
4034 NewOperands.reserve(NumElts / 2);
4035 for (unsigned i = 0; i < VT.getVectorNumElements(); i += 2)
4036 NewOperands.push_back(pack(Op.getOperand(i), Op.getOperand(i + 1)));
4037 assert(NumElts == NewOperands.size() * 2);
4038 MVT WideVT = MVT::getIntegerVT(ElemSizeInBits * 2);
4039 MVT WideVecVT = MVT::getVectorVT(WideVT, NumElts / 2);
4040 return DAG.getNode(ISD::BITCAST, DL, VT,
4041 DAG.getBuildVector(WideVecVT, DL, NewOperands));
4042}
4043
4045 const RISCVSubtarget &Subtarget) {
4046 MVT VT = Op.getSimpleValueType();
4047 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4048
4049 MVT EltVT = VT.getVectorElementType();
4050 MVT XLenVT = Subtarget.getXLenVT();
4051
4052 SDLoc DL(Op);
4053
4054 // Proper support for f16 requires Zvfh. bf16 always requires special
4055 // handling. We need to cast the scalar to integer and create an integer
4056 // build_vector.
4057 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) || EltVT == MVT::bf16) {
4058 MVT IVT = VT.changeVectorElementType(MVT::i16);
4060 for (unsigned I = 0, E = Op.getNumOperands(); I != E; ++I) {
4061 SDValue Elem = Op.getOperand(I);
4062 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4063 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin())) {
4064 // Called by LegalizeDAG, we need to use XLenVT operations since we
4065 // can't create illegal types.
4066 if (auto *C = dyn_cast<ConstantFPSDNode>(Elem)) {
4067 // Manually constant fold so the integer build_vector can be lowered
4068 // better. Waiting for DAGCombine will be too late.
4069 APInt V =
4070 C->getValueAPF().bitcastToAPInt().sext(XLenVT.getSizeInBits());
4071 NewOps[I] = DAG.getConstant(V, DL, XLenVT);
4072 } else {
4073 NewOps[I] = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Elem);
4074 }
4075 } else {
4076 // Called by scalar type legalizer, we can use i16.
4077 NewOps[I] = DAG.getBitcast(MVT::i16, Op.getOperand(I));
4078 }
4079 }
4080 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, DL, IVT, NewOps);
4081 return DAG.getBitcast(VT, Res);
4082 }
4083
4084 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
4086 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
4087
4088 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4089
4090 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4091
4092 if (VT.getVectorElementType() == MVT::i1) {
4093 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
4094 // vector type, we have a legal equivalently-sized i8 type, so we can use
4095 // that.
4096 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
4097 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
4098
4099 SDValue WideVec;
4100 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
4101 // For a splat, perform a scalar truncate before creating the wider
4102 // vector.
4103 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
4104 DAG.getConstant(1, DL, Splat.getValueType()));
4105 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
4106 } else {
4107 SmallVector<SDValue, 8> Ops(Op->op_values());
4108 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
4109 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
4110 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
4111 }
4112
4113 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
4114 }
4115
4116 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
4117 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
4118 return Gather;
4119 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
4121 if (!VT.isFloatingPoint())
4122 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4123 Splat =
4124 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
4125 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4126 }
4127
4128 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4129 return Res;
4130
4131 // If we're compiling for an exact VLEN value, we can split our work per
4132 // register in the register group.
4133 if (const auto VLen = Subtarget.getRealVLen();
4134 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
4135 MVT ElemVT = VT.getVectorElementType();
4136 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4137 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4138 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4139 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4140 assert(M1VT == getLMUL1VT(M1VT));
4141
4142 // The following semantically builds up a fixed length concat_vector
4143 // of the component build_vectors. We eagerly lower to scalable and
4144 // insert_subvector here to avoid DAG combining it back to a large
4145 // build_vector.
4146 SmallVector<SDValue> BuildVectorOps(Op->ops());
4147 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4148 SDValue Vec = DAG.getUNDEF(ContainerVT);
4149 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
4150 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
4151 SDValue SubBV =
4152 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
4153 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
4154 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
4155 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubBV,
4156 DAG.getVectorIdxConstant(InsertIdx, DL));
4157 }
4158 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4159 }
4160
4161 // If we're about to resort to vslide1down (or stack usage), pack our
4162 // elements into the widest scalar type we can. This will force a VL/VTYPE
4163 // toggle, but reduces the critical path, the number of vslide1down ops
4164 // required, and possibly enables scalar folds of the values.
4165 if (SDValue Res = lowerBuildVectorViaPacking(Op, DAG, Subtarget))
4166 return Res;
4167
4168 // For m1 vectors, if we have non-undef values in both halves of our vector,
4169 // split the vector into low and high halves, build them separately, then
4170 // use a vselect to combine them. For long vectors, this cuts the critical
4171 // path of the vslide1down sequence in half, and gives us an opportunity
4172 // to special case each half independently. Note that we don't change the
4173 // length of the sub-vectors here, so if both fallback to the generic
4174 // vslide1down path, we should be able to fold the vselect into the final
4175 // vslidedown (for the undef tail) for the first half w/ masking.
4176 unsigned NumElts = VT.getVectorNumElements();
4177 unsigned NumUndefElts =
4178 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
4179 unsigned NumDefElts = NumElts - NumUndefElts;
4180 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
4181 ContainerVT.bitsLE(getLMUL1VT(ContainerVT))) {
4182 SmallVector<SDValue> SubVecAOps, SubVecBOps;
4183 SmallVector<SDValue> MaskVals;
4184 SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
4185 SubVecAOps.reserve(NumElts);
4186 SubVecBOps.reserve(NumElts);
4187 for (unsigned i = 0; i < NumElts; i++) {
4188 SDValue Elem = Op->getOperand(i);
4189 if (i < NumElts / 2) {
4190 SubVecAOps.push_back(Elem);
4191 SubVecBOps.push_back(UndefElem);
4192 } else {
4193 SubVecAOps.push_back(UndefElem);
4194 SubVecBOps.push_back(Elem);
4195 }
4196 bool SelectMaskVal = (i < NumElts / 2);
4197 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4198 }
4199 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
4200 MaskVals.size() == NumElts);
4201
4202 SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
4203 SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
4204 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4205 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
4206 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
4207 }
4208
4209 // Cap the cost at a value linear to the number of elements in the vector.
4210 // The default lowering is to use the stack. The vector store + scalar loads
4211 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
4212 // being (at least) linear in LMUL. As a result, using the vslidedown
4213 // lowering for every element ends up being VL*LMUL..
4214 // TODO: Should we be directly costing the stack alternative? Doing so might
4215 // give us a more accurate upper bound.
4216 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
4217
4218 // TODO: unify with TTI getSlideCost.
4219 InstructionCost PerSlideCost = 1;
4220 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
4221 default: break;
4223 PerSlideCost = 2;
4224 break;
4226 PerSlideCost = 4;
4227 break;
4229 PerSlideCost = 8;
4230 break;
4231 }
4232
4233 // TODO: Should we be using the build instseq then cost + evaluate scheme
4234 // we use for integer constants here?
4235 unsigned UndefCount = 0;
4236 for (const SDValue &V : Op->ops()) {
4237 if (V.isUndef()) {
4238 UndefCount++;
4239 continue;
4240 }
4241 if (UndefCount) {
4242 LinearBudget -= PerSlideCost;
4243 UndefCount = 0;
4244 }
4245 LinearBudget -= PerSlideCost;
4246 }
4247 if (UndefCount) {
4248 LinearBudget -= PerSlideCost;
4249 }
4250
4251 if (LinearBudget < 0)
4252 return SDValue();
4253
4254 assert((!VT.isFloatingPoint() ||
4255 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4256 "Illegal type which will result in reserved encoding");
4257
4258 const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
4259
4260 SDValue Vec;
4261 UndefCount = 0;
4262 for (SDValue V : Op->ops()) {
4263 if (V.isUndef()) {
4264 UndefCount++;
4265 continue;
4266 }
4267
4268 // Start our sequence with a TA splat in the hopes that hardware is able to
4269 // recognize there's no dependency on the prior value of our temporary
4270 // register.
4271 if (!Vec) {
4272 Vec = DAG.getSplatVector(VT, DL, V);
4273 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4274 UndefCount = 0;
4275 continue;
4276 }
4277
4278 if (UndefCount) {
4279 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4280 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4281 Vec, Offset, Mask, VL, Policy);
4282 UndefCount = 0;
4283 }
4284 auto OpCode =
4286 if (!VT.isFloatingPoint())
4287 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
4288 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4289 V, Mask, VL);
4290 }
4291 if (UndefCount) {
4292 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4293 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4294 Vec, Offset, Mask, VL, Policy);
4295 }
4296 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4297}
4298
4299static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4301 SelectionDAG &DAG) {
4302 if (!Passthru)
4303 Passthru = DAG.getUNDEF(VT);
4304 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
4305 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4306 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4307 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4308 // node in order to try and match RVV vector/scalar instructions.
4309 if ((LoC >> 31) == HiC)
4310 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4311
4312 // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
4313 // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
4314 // vlmax vsetvli or vsetivli to change the VL.
4315 // FIXME: Support larger constants?
4316 // FIXME: Support non-constant VLs by saturating?
4317 if (LoC == HiC) {
4318 SDValue NewVL;
4319 if (isAllOnesConstant(VL) ||
4320 (isa<RegisterSDNode>(VL) &&
4321 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
4322 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4323 else if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
4324 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
4325
4326 if (NewVL) {
4327 MVT InterVT =
4328 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4329 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4330 DAG.getUNDEF(InterVT), Lo, NewVL);
4331 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
4332 }
4333 }
4334 }
4335
4336 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4337 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4338 isa<ConstantSDNode>(Hi.getOperand(1)) &&
4339 Hi.getConstantOperandVal(1) == 31)
4340 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4341
4342 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
4343 // even if it might be sign extended.
4344 if (Hi.isUndef())
4345 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4346
4347 // Fall back to a stack store and stride x0 vector load.
4348 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
4349 Hi, VL);
4350}
4351
4352// Called by type legalization to handle splat of i64 on RV32.
4353// FIXME: We can optimize this when the type has sign or zero bits in one
4354// of the halves.
4355static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4356 SDValue Scalar, SDValue VL,
4357 SelectionDAG &DAG) {
4358 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4359 SDValue Lo, Hi;
4360 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
4361 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4362}
4363
4364// This function lowers a splat of a scalar operand Splat with the vector
4365// length VL. It ensures the final sequence is type legal, which is useful when
4366// lowering a splat after type legalization.
4367static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4368 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4369 const RISCVSubtarget &Subtarget) {
4370 bool HasPassthru = Passthru && !Passthru.isUndef();
4371 if (!HasPassthru && !Passthru)
4372 Passthru = DAG.getUNDEF(VT);
4373
4374 MVT EltVT = VT.getVectorElementType();
4375 MVT XLenVT = Subtarget.getXLenVT();
4376
4377 if (VT.isFloatingPoint()) {
4378 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
4379 EltVT == MVT::bf16) {
4380 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4381 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
4382 Scalar = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Scalar);
4383 else
4384 Scalar = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Scalar);
4385 MVT IVT = VT.changeVectorElementType(MVT::i16);
4386 Passthru = DAG.getNode(ISD::BITCAST, DL, IVT, Passthru);
4387 SDValue Splat =
4388 lowerScalarSplat(Passthru, Scalar, VL, IVT, DL, DAG, Subtarget);
4389 return DAG.getNode(ISD::BITCAST, DL, VT, Splat);
4390 }
4391 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
4392 }
4393
4394 // Simplest case is that the operand needs to be promoted to XLenVT.
4395 if (Scalar.getValueType().bitsLE(XLenVT)) {
4396 // If the operand is a constant, sign extend to increase our chances
4397 // of being able to use a .vi instruction. ANY_EXTEND would become a
4398 // a zero extend and the simm5 check in isel would fail.
4399 // FIXME: Should we ignore the upper bits in isel instead?
4400 unsigned ExtOpc =
4401 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4402 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4403 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
4404 }
4405
4406 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4407 "Unexpected scalar for splat lowering!");
4408
4409 if (isOneConstant(VL) && isNullConstant(Scalar))
4410 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
4411 DAG.getConstant(0, DL, XLenVT), VL);
4412
4413 // Otherwise use the more complicated splatting algorithm.
4414 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4415}
4416
4417// This function lowers an insert of a scalar operand Scalar into lane
4418// 0 of the vector regardless of the value of VL. The contents of the
4419// remaining lanes of the result vector are unspecified. VL is assumed
4420// to be non-zero.
4422 const SDLoc &DL, SelectionDAG &DAG,
4423 const RISCVSubtarget &Subtarget) {
4424 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4425
4426 const MVT XLenVT = Subtarget.getXLenVT();
4427 SDValue Passthru = DAG.getUNDEF(VT);
4428
4429 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4430 isNullConstant(Scalar.getOperand(1))) {
4431 SDValue ExtractedVal = Scalar.getOperand(0);
4432 // The element types must be the same.
4433 if (ExtractedVal.getValueType().getVectorElementType() ==
4434 VT.getVectorElementType()) {
4435 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4436 MVT ExtractedContainerVT = ExtractedVT;
4437 if (ExtractedContainerVT.isFixedLengthVector()) {
4438 ExtractedContainerVT = getContainerForFixedLengthVector(
4439 DAG, ExtractedContainerVT, Subtarget);
4440 ExtractedVal = convertToScalableVector(ExtractedContainerVT,
4441 ExtractedVal, DAG, Subtarget);
4442 }
4443 if (ExtractedContainerVT.bitsLE(VT))
4444 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru,
4445 ExtractedVal, DAG.getVectorIdxConstant(0, DL));
4446 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal,
4447 DAG.getVectorIdxConstant(0, DL));
4448 }
4449 }
4450
4451
4452 if (VT.isFloatingPoint())
4453 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT,
4454 DAG.getUNDEF(VT), Scalar, VL);
4455
4456 // Avoid the tricky legalization cases by falling back to using the
4457 // splat code which already handles it gracefully.
4458 if (!Scalar.getValueType().bitsLE(XLenVT))
4459 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
4460 DAG.getConstant(1, DL, XLenVT),
4461 VT, DL, DAG, Subtarget);
4462
4463 // If the operand is a constant, sign extend to increase our chances
4464 // of being able to use a .vi instruction. ANY_EXTEND would become a
4465 // a zero extend and the simm5 check in isel would fail.
4466 // FIXME: Should we ignore the upper bits in isel instead?
4467 unsigned ExtOpc =
4468 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4469 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4470 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
4471 VL);
4472}
4473
4474// Can this shuffle be performed on exactly one (possibly larger) input?
4475static SDValue getSingleShuffleSrc(MVT VT, MVT ContainerVT, SDValue V1,
4476 SDValue V2) {
4477
4478 if (V2.isUndef() &&
4480 return V1;
4481
4482 // Both input must be extracts.
4483 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4484 V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
4485 return SDValue();
4486
4487 // Extracting from the same source.
4488 SDValue Src = V1.getOperand(0);
4489 if (Src != V2.getOperand(0))
4490 return SDValue();
4491
4492 // Src needs to have twice the number of elements.
4493 unsigned NumElts = VT.getVectorNumElements();
4494 if (Src.getValueType().getVectorNumElements() != (NumElts * 2))
4495 return SDValue();
4496
4497 // The extracts must extract the two halves of the source.
4498 if (V1.getConstantOperandVal(1) != 0 ||
4499 V2.getConstantOperandVal(1) != NumElts)
4500 return SDValue();
4501
4502 return Src;
4503}
4504
4505/// Is this shuffle interleaving contiguous elements from one vector into the
4506/// even elements and contiguous elements from another vector into the odd
4507/// elements. \p EvenSrc will contain the element that should be in the first
4508/// even element. \p OddSrc will contain the element that should be in the first
4509/// odd element. These can be the first element in a source or the element half
4510/// way through the source.
4511static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4512 int &OddSrc, const RISCVSubtarget &Subtarget) {
4513 // We need to be able to widen elements to the next larger integer type.
4514 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4515 return false;
4516
4517 int Size = Mask.size();
4518 int NumElts = VT.getVectorNumElements();
4519 assert(Size == (int)NumElts && "Unexpected mask size");
4520
4521 SmallVector<unsigned, 2> StartIndexes;
4522 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
4523 return false;
4524
4525 EvenSrc = StartIndexes[0];
4526 OddSrc = StartIndexes[1];
4527
4528 // One source should be low half of first vector.
4529 if (EvenSrc != 0 && OddSrc != 0)
4530 return false;
4531
4532 // Subvectors will be subtracted from either at the start of the two input
4533 // vectors, or at the start and middle of the first vector if it's an unary
4534 // interleave.
4535 // In both cases, HalfNumElts will be extracted.
4536 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4537 // we'll create an illegal extract_subvector.
4538 // FIXME: We could support other values using a slidedown first.
4539 int HalfNumElts = NumElts / 2;
4540 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4541}
4542
4543/// Match shuffles that concatenate two vectors, rotate the concatenation,
4544/// and then extract the original number of elements from the rotated result.
4545/// This is equivalent to vector.splice or X86's PALIGNR instruction. The
4546/// returned rotation amount is for a rotate right, where elements move from
4547/// higher elements to lower elements. \p LoSrc indicates the first source
4548/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
4549/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
4550/// 0 or 1 if a rotation is found.
4551///
4552/// NOTE: We talk about rotate to the right which matches how bit shift and
4553/// rotate instructions are described where LSBs are on the right, but LLVM IR
4554/// and the table below write vectors with the lowest elements on the left.
4555static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
4556 int Size = Mask.size();
4557
4558 // We need to detect various ways of spelling a rotation:
4559 // [11, 12, 13, 14, 15, 0, 1, 2]
4560 // [-1, 12, 13, 14, -1, -1, 1, -1]
4561 // [-1, -1, -1, -1, -1, -1, 1, 2]
4562 // [ 3, 4, 5, 6, 7, 8, 9, 10]
4563 // [-1, 4, 5, 6, -1, -1, 9, -1]
4564 // [-1, 4, 5, 6, -1, -1, -1, -1]
4565 int Rotation = 0;
4566 LoSrc = -1;
4567 HiSrc = -1;
4568 for (int i = 0; i != Size; ++i) {
4569 int M = Mask[i];
4570 if (M < 0)
4571 continue;
4572
4573 // Determine where a rotate vector would have started.
4574 int StartIdx = i - (M % Size);
4575 // The identity rotation isn't interesting, stop.
4576 if (StartIdx == 0)
4577 return -1;
4578
4579 // If we found the tail of a vector the rotation must be the missing
4580 // front. If we found the head of a vector, it must be how much of the
4581 // head.
4582 int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
4583
4584 if (Rotation == 0)
4585 Rotation = CandidateRotation;
4586 else if (Rotation != CandidateRotation)
4587 // The rotations don't match, so we can't match this mask.
4588 return -1;
4589
4590 // Compute which value this mask is pointing at.
4591 int MaskSrc = M < Size ? 0 : 1;
4592
4593 // Compute which of the two target values this index should be assigned to.
4594 // This reflects whether the high elements are remaining or the low elemnts
4595 // are remaining.
4596 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
4597
4598 // Either set up this value if we've not encountered it before, or check
4599 // that it remains consistent.
4600 if (TargetSrc < 0)
4601 TargetSrc = MaskSrc;
4602 else if (TargetSrc != MaskSrc)
4603 // This may be a rotation, but it pulls from the inputs in some
4604 // unsupported interleaving.
4605 return -1;
4606 }
4607
4608 // Check that we successfully analyzed the mask, and normalize the results.
4609 assert(Rotation != 0 && "Failed to locate a viable rotation!");
4610 assert((LoSrc >= 0 || HiSrc >= 0) &&
4611 "Failed to find a rotated input vector!");
4612
4613 return Rotation;
4614}
4615
4616// Lower a deinterleave shuffle to SRL and TRUNC. Factor must be
4617// 2, 4, 8 and the integer type Factor-times larger than VT's
4618// element type must be a legal element type.
4619// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (Factor=2, Index=0)
4620// -> [p, q, r, s] (Factor=2, Index=1)
4622 SDValue Src, unsigned Factor,
4623 unsigned Index, SelectionDAG &DAG) {
4624 unsigned EltBits = VT.getScalarSizeInBits();
4625 ElementCount SrcEC = Src.getValueType().getVectorElementCount();
4626 MVT WideSrcVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor),
4627 SrcEC.divideCoefficientBy(Factor));
4628 MVT ResVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits),
4629 SrcEC.divideCoefficientBy(Factor));
4630 Src = DAG.getBitcast(WideSrcVT, Src);
4631
4632 unsigned Shift = Index * EltBits;
4633 SDValue Res = DAG.getNode(ISD::SRL, DL, WideSrcVT, Src,
4634 DAG.getConstant(Shift, DL, WideSrcVT));
4635 Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT, Res);
4637 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, IntVT, DAG.getUNDEF(IntVT), Res,
4638 DAG.getVectorIdxConstant(0, DL));
4639 return DAG.getBitcast(VT, Res);
4640}
4641
4642// Lower the following shuffle to vslidedown.
4643// a)
4644// t49: v8i8 = extract_subvector t13, Constant:i64<0>
4645// t109: v8i8 = extract_subvector t13, Constant:i64<8>
4646// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
4647// b)
4648// t69: v16i16 = extract_subvector t68, Constant:i64<0>
4649// t23: v8i16 = extract_subvector t69, Constant:i64<0>
4650// t29: v4i16 = extract_subvector t23, Constant:i64<4>
4651// t26: v8i16 = extract_subvector t69, Constant:i64<8>
4652// t30: v4i16 = extract_subvector t26, Constant:i64<0>
4653// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
4655 SDValue V1, SDValue V2,
4656 ArrayRef<int> Mask,
4657 const RISCVSubtarget &Subtarget,
4658 SelectionDAG &DAG) {
4659 auto findNonEXTRACT_SUBVECTORParent =
4660 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
4661 uint64_t Offset = 0;
4662 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
4663 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
4664 // a scalable vector. But we don't want to match the case.
4665 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
4666 Offset += Parent.getConstantOperandVal(1);
4667 Parent = Parent.getOperand(0);
4668 }
4669 return std::make_pair(Parent, Offset);
4670 };
4671
4672 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
4673 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
4674
4675 // Extracting from the same source.
4676 SDValue Src = V1Src;
4677 if (Src != V2Src)
4678 return SDValue();
4679
4680 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
4681 SmallVector<int, 16> NewMask(Mask);
4682 for (size_t i = 0; i != NewMask.size(); ++i) {
4683 if (NewMask[i] == -1)
4684 continue;
4685
4686 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
4687 NewMask[i] = NewMask[i] + V1IndexOffset;
4688 } else {
4689 // Minus NewMask.size() is needed. Otherwise, the b case would be
4690 // <5,6,7,12> instead of <5,6,7,8>.
4691 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
4692 }
4693 }
4694
4695 // First index must be known and non-zero. It will be used as the slidedown
4696 // amount.
4697 if (NewMask[0] <= 0)
4698 return SDValue();
4699
4700 // NewMask is also continuous.
4701 for (unsigned i = 1; i != NewMask.size(); ++i)
4702 if (NewMask[i - 1] + 1 != NewMask[i])
4703 return SDValue();
4704
4705 MVT XLenVT = Subtarget.getXLenVT();
4706 MVT SrcVT = Src.getSimpleValueType();
4707 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
4708 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
4709 SDValue Slidedown =
4710 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4711 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
4712 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
4713 return DAG.getNode(
4715 convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget),
4716 DAG.getConstant(0, DL, XLenVT));
4717}
4718
4719// Because vslideup leaves the destination elements at the start intact, we can
4720// use it to perform shuffles that insert subvectors:
4721//
4722// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
4723// ->
4724// vsetvli zero, 8, e8, mf2, ta, ma
4725// vslideup.vi v8, v9, 4
4726//
4727// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
4728// ->
4729// vsetvli zero, 5, e8, mf2, tu, ma
4730// vslideup.v1 v8, v9, 2
4732 SDValue V1, SDValue V2,
4733 ArrayRef<int> Mask,
4734 const RISCVSubtarget &Subtarget,
4735 SelectionDAG &DAG) {
4736 unsigned NumElts = VT.getVectorNumElements();
4737 int NumSubElts, Index;
4738 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
4739 Index))
4740 return SDValue();
4741
4742 bool OpsSwapped = Mask[Index] < (int)NumElts;
4743 SDValue InPlace = OpsSwapped ? V2 : V1;
4744 SDValue ToInsert = OpsSwapped ? V1 : V2;
4745
4746 MVT XLenVT = Subtarget.getXLenVT();
4747 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4748 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
4749 // We slide up by the index that the subvector is being inserted at, and set
4750 // VL to the index + the number of elements being inserted.
4752 // If the we're adding a suffix to the in place vector, i.e. inserting right
4753 // up to the very end of it, then we don't actually care about the tail.
4754 if (NumSubElts + Index >= (int)NumElts)
4755 Policy |= RISCVII::TAIL_AGNOSTIC;
4756
4757 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
4758 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
4759 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
4760
4761 SDValue Res;
4762 // If we're inserting into the lowest elements, use a tail undisturbed
4763 // vmv.v.v.
4764 if (Index == 0)
4765 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
4766 VL);
4767 else
4768 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
4769 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
4770 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4771}
4772
4773/// Match v(f)slide1up/down idioms. These operations involve sliding
4774/// N-1 elements to make room for an inserted scalar at one end.
4776 SDValue V1, SDValue V2,
4777 ArrayRef<int> Mask,
4778 const RISCVSubtarget &Subtarget,
4779 SelectionDAG &DAG) {
4780 bool OpsSwapped = false;
4781 if (!isa<BuildVectorSDNode>(V1)) {
4782 if (!isa<BuildVectorSDNode>(V2))
4783 return SDValue();
4784 std::swap(V1, V2);
4785 OpsSwapped = true;
4786 }
4787 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
4788 if (!Splat)
4789 return SDValue();
4790
4791 // Return true if the mask could describe a slide of Mask.size() - 1
4792 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
4793 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
4794 const unsigned S = (Offset > 0) ? 0 : -Offset;
4795 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
4796 for (unsigned i = S; i != E; ++i)
4797 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
4798 return false;
4799 return true;
4800 };
4801
4802 const unsigned NumElts = VT.getVectorNumElements();
4803 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
4804 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
4805 return SDValue();
4806
4807 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
4808 // Inserted lane must come from splat, undef scalar is legal but not profitable.
4809 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
4810 return SDValue();
4811
4812 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4813 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4814
4815 // zvfhmin and zvfbfmin don't have vfslide1{down,up}.vf so use fmv.x.h +
4816 // vslide1{down,up}.vx instead.
4817 if (VT.getVectorElementType() == MVT::bf16 ||
4818 (VT.getVectorElementType() == MVT::f16 &&
4819 !Subtarget.hasVInstructionsF16())) {
4820 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
4821 Splat =
4822 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Splat);
4823 V2 = DAG.getBitcast(
4824 IntVT, convertToScalableVector(ContainerVT, V2, DAG, Subtarget));
4825 SDValue Vec = DAG.getNode(
4827 IntVT, DAG.getUNDEF(IntVT), V2, Splat, TrueMask, VL);
4828 Vec = DAG.getBitcast(ContainerVT, Vec);
4829 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4830 }
4831
4832 auto OpCode = IsVSlidedown ?
4835 if (!VT.isFloatingPoint())
4836 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
4837 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
4838 DAG.getUNDEF(ContainerVT),
4839 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
4840 Splat, TrueMask, VL);
4841 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4842}
4843
4844// Match a mask which "spreads" the leading elements of a vector evenly
4845// across the result. Factor is the spread amount, and Index is the
4846// offset applied. (on success, Index < Factor) This is the inverse
4847// of a deinterleave with the same Factor and Index. This is analogous
4848// to an interleave, except that all but one lane is undef.
4849static bool isSpreadMask(ArrayRef<int> Mask, unsigned Factor, unsigned &Index) {
4850 SmallVector<bool> LaneIsUndef(Factor, true);
4851 for (unsigned i = 0; i < Mask.size(); i++)
4852 LaneIsUndef[i % Factor] &= (Mask[i] == -1);
4853
4854 bool Found = false;
4855 for (unsigned i = 0; i < Factor; i++) {
4856 if (LaneIsUndef[i])
4857 continue;
4858 if (Found)
4859 return false;
4860 Index = i;
4861 Found = true;
4862 }
4863 if (!Found)
4864 return false;
4865
4866 for (unsigned i = 0; i < Mask.size() / Factor; i++) {
4867 unsigned j = i * Factor + Index;
4868 if (Mask[j] != -1 && (unsigned)Mask[j] != i)
4869 return false;
4870 }
4871 return true;
4872}
4873
4874// Given a vector a, b, c, d return a vector Factor times longer
4875// with Factor-1 undef's between elements. Ex:
4876// a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0)
4877// undef, a, undef, b, undef, c, undef, d (Factor=2, Index=1)
4878static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index,
4879 const SDLoc &DL, SelectionDAG &DAG) {
4880
4881 MVT VT = V.getSimpleValueType();
4882 unsigned EltBits = VT.getScalarSizeInBits();
4884 V = DAG.getBitcast(VT.changeTypeToInteger(), V);
4885
4886 MVT WideVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor), EC);
4887
4888 SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, V);
4889 // TODO: On rv32, the constant becomes a splat_vector_parts which does not
4890 // allow the SHL to fold away if Index is 0.
4891 if (Index != 0)
4892 Result = DAG.getNode(ISD::SHL, DL, WideVT, Result,
4893 DAG.getConstant(EltBits * Index, DL, WideVT));
4894 // Make sure to use original element type
4896 EC.multiplyCoefficientBy(Factor));
4897 return DAG.getBitcast(ResultVT, Result);
4898}
4899
4900// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
4901// to create an interleaved vector of <[vscale x] n*2 x ty>.
4902// This requires that the size of ty is less than the subtarget's maximum ELEN.
4904 const SDLoc &DL, SelectionDAG &DAG,
4905 const RISCVSubtarget &Subtarget) {
4906
4907 // FIXME: Not only does this optimize the code, it fixes some correctness
4908 // issues because MIR does not have freeze.
4909 if (EvenV.isUndef())
4910 return getWideningSpread(OddV, 2, 1, DL, DAG);
4911 if (OddV.isUndef())
4912 return getWideningSpread(EvenV, 2, 0, DL, DAG);
4913
4914 MVT VecVT = EvenV.getSimpleValueType();
4915 MVT VecContainerVT = VecVT; // <vscale x n x ty>
4916 // Convert fixed vectors to scalable if needed
4917 if (VecContainerVT.isFixedLengthVector()) {
4918 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
4919 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
4920 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
4921 }
4922
4923 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
4924
4925 // We're working with a vector of the same size as the resulting
4926 // interleaved vector, but with half the number of elements and
4927 // twice the SEW (Hence the restriction on not using the maximum
4928 // ELEN)
4929 MVT WideVT =
4931 VecVT.getVectorElementCount());
4932 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
4933 if (WideContainerVT.isFixedLengthVector())
4934 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
4935
4936 // Bitcast the input vectors to integers in case they are FP
4937 VecContainerVT = VecContainerVT.changeTypeToInteger();
4938 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
4939 OddV = DAG.getBitcast(VecContainerVT, OddV);
4940
4941 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
4942 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
4943
4944 SDValue Interleaved;
4945 if (Subtarget.hasStdExtZvbb()) {
4946 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
4947 SDValue OffsetVec =
4948 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
4949 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
4950 OffsetVec, Passthru, Mask, VL);
4951 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
4952 Interleaved, EvenV, Passthru, Mask, VL);
4953 } else {
4954 // FIXME: We should freeze the odd vector here. We already handled the case
4955 // of provably undef/poison above.
4956
4957 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
4958 // vwaddu.vv
4959 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
4960 OddV, Passthru, Mask, VL);
4961
4962 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
4963 SDValue AllOnesVec = DAG.getSplatVector(
4964 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
4965 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
4966 OddV, AllOnesVec, Passthru, Mask, VL);
4967
4968 // Add the two together so we get
4969 // (OddV * 0xff...ff) + (OddV + EvenV)
4970 // = (OddV * 0x100...00) + EvenV
4971 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
4972 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
4973 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
4974 Interleaved, OddsMul, Passthru, Mask, VL);
4975 }
4976
4977 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
4978 MVT ResultContainerVT = MVT::getVectorVT(
4979 VecVT.getVectorElementType(), // Make sure to use original type
4980 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
4981 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
4982
4983 // Convert back to a fixed vector if needed
4984 MVT ResultVT =
4987 if (ResultVT.isFixedLengthVector())
4988 Interleaved =
4989 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
4990
4991 return Interleaved;
4992}
4993
4994// If we have a vector of bits that we want to reverse, we can use a vbrev on a
4995// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
4997 SelectionDAG &DAG,
4998 const RISCVSubtarget &Subtarget) {
4999 SDLoc DL(SVN);
5000 MVT VT = SVN->getSimpleValueType(0);
5001 SDValue V = SVN->getOperand(0);
5002 unsigned NumElts = VT.getVectorNumElements();
5003
5004 assert(VT.getVectorElementType() == MVT::i1);
5005
5007 SVN->getMask().size()) ||
5008 !SVN->getOperand(1).isUndef())
5009 return SDValue();
5010
5011 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
5012 EVT ViaVT = EVT::getVectorVT(
5013 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
5014 EVT ViaBitVT =
5015 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
5016
5017 // If we don't have zvbb or the larger element type > ELEN, the operation will
5018 // be illegal.
5020 ViaVT) ||
5021 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
5022 return SDValue();
5023
5024 // If the bit vector doesn't fit exactly into the larger element type, we need
5025 // to insert it into the larger vector and then shift up the reversed bits
5026 // afterwards to get rid of the gap introduced.
5027 if (ViaEltSize > NumElts)
5028 V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ViaBitVT, DAG.getUNDEF(ViaBitVT),
5029 V, DAG.getVectorIdxConstant(0, DL));
5030
5031 SDValue Res =
5032 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
5033
5034 // Shift up the reversed bits if the vector didn't exactly fit into the larger
5035 // element type.
5036 if (ViaEltSize > NumElts)
5037 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
5038 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
5039
5040 Res = DAG.getBitcast(ViaBitVT, Res);
5041
5042 if (ViaEltSize > NumElts)
5043 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
5044 DAG.getVectorIdxConstant(0, DL));
5045 return Res;
5046}
5047
5049 SelectionDAG &DAG,
5050 const RISCVSubtarget &Subtarget,
5051 MVT &RotateVT, unsigned &RotateAmt) {
5052 SDLoc DL(SVN);
5053
5054 EVT VT = SVN->getValueType(0);
5055 unsigned NumElts = VT.getVectorNumElements();
5056 unsigned EltSizeInBits = VT.getScalarSizeInBits();
5057 unsigned NumSubElts;
5058 if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2,
5059 NumElts, NumSubElts, RotateAmt))
5060 return false;
5061 RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
5062 NumElts / NumSubElts);
5063
5064 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
5065 return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);
5066}
5067
5068// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
5069// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
5070// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
5072 SelectionDAG &DAG,
5073 const RISCVSubtarget &Subtarget) {
5074 SDLoc DL(SVN);
5075
5076 EVT VT = SVN->getValueType(0);
5077 unsigned RotateAmt;
5078 MVT RotateVT;
5079 if (!isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
5080 return SDValue();
5081
5082 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
5083
5084 SDValue Rotate;
5085 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
5086 // so canonicalize to vrev8.
5087 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
5088 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
5089 else
5090 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
5091 DAG.getConstant(RotateAmt, DL, RotateVT));
5092
5093 return DAG.getBitcast(VT, Rotate);
5094}
5095
5096// If compiling with an exactly known VLEN, see if we can split a
5097// shuffle on m2 or larger into a small number of m1 sized shuffles
5098// which write each destination registers exactly once.
5100 SelectionDAG &DAG,
5101 const RISCVSubtarget &Subtarget) {
5102 SDLoc DL(SVN);
5103 MVT VT = SVN->getSimpleValueType(0);
5104 SDValue V1 = SVN->getOperand(0);
5105 SDValue V2 = SVN->getOperand(1);
5106 ArrayRef<int> Mask = SVN->getMask();
5107 unsigned NumElts = VT.getVectorNumElements();
5108
5109 // If we don't know exact data layout, not much we can do. If this
5110 // is already m1 or smaller, no point in splitting further.
5111 const auto VLen = Subtarget.getRealVLen();
5112 if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)
5113 return SDValue();
5114
5115 // Avoid picking up bitrotate patterns which we have a linear-in-lmul
5116 // expansion for.
5117 unsigned RotateAmt;
5118 MVT RotateVT;
5119 if (isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
5120 return SDValue();
5121
5122 MVT ElemVT = VT.getVectorElementType();
5123 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
5124 unsigned VRegsPerSrc = NumElts / ElemsPerVReg;
5125
5127 OutMasks(VRegsPerSrc, {-1, {}});
5128
5129 // Check if our mask can be done as a 1-to-1 mapping from source
5130 // to destination registers in the group without needing to
5131 // write each destination more than once.
5132 for (unsigned DstIdx = 0; DstIdx < Mask.size(); DstIdx++) {
5133 int DstVecIdx = DstIdx / ElemsPerVReg;
5134 int DstSubIdx = DstIdx % ElemsPerVReg;
5135 int SrcIdx = Mask[DstIdx];
5136 if (SrcIdx < 0 || (unsigned)SrcIdx >= 2 * NumElts)
5137 continue;
5138 int SrcVecIdx = SrcIdx / ElemsPerVReg;
5139 int SrcSubIdx = SrcIdx % ElemsPerVReg;
5140 if (OutMasks[DstVecIdx].first == -1)
5141 OutMasks[DstVecIdx].first = SrcVecIdx;
5142 if (OutMasks[DstVecIdx].first != SrcVecIdx)
5143 // Note: This case could easily be handled by keeping track of a chain
5144 // of source values and generating two element shuffles below. This is
5145 // less an implementation question, and more a profitability one.
5146 return SDValue();
5147
5148 OutMasks[DstVecIdx].second.resize(ElemsPerVReg, -1);
5149 OutMasks[DstVecIdx].second[DstSubIdx] = SrcSubIdx;
5150 }
5151
5152 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5153 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
5154 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
5155 assert(M1VT == getLMUL1VT(M1VT));
5156 unsigned NumOpElts = M1VT.getVectorMinNumElements();
5157 SDValue Vec = DAG.getUNDEF(ContainerVT);
5158 // The following semantically builds up a fixed length concat_vector
5159 // of the component shuffle_vectors. We eagerly lower to scalable here
5160 // to avoid DAG combining it back to a large shuffle_vector again.
5161 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5162 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
5163 for (unsigned DstVecIdx = 0 ; DstVecIdx < OutMasks.size(); DstVecIdx++) {
5164 auto &[SrcVecIdx, SrcSubMask] = OutMasks[DstVecIdx];
5165 if (SrcVecIdx == -1)
5166 continue;
5167 unsigned ExtractIdx = (SrcVecIdx % VRegsPerSrc) * NumOpElts;
5168 SDValue SrcVec = (unsigned)SrcVecIdx >= VRegsPerSrc ? V2 : V1;
5169 SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec,
5170 DAG.getVectorIdxConstant(ExtractIdx, DL));
5171 SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
5172 SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec, SubVec, SrcSubMask);
5173 SubVec = convertToScalableVector(M1VT, SubVec, DAG, Subtarget);
5174 unsigned InsertIdx = DstVecIdx * NumOpElts;
5175 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubVec,
5176 DAG.getVectorIdxConstant(InsertIdx, DL));
5177 }
5178 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5179}
5180
5181// Matches a subset of compress masks with a contiguous prefix of output
5182// elements. This could be extended to allow gaps by deciding which
5183// source elements to spuriously demand.
5185 int Last = -1;
5186 bool SawUndef = false;
5187 for (unsigned i = 0; i < Mask.size(); i++) {
5188 if (Mask[i] == -1) {
5189 SawUndef = true;
5190 continue;
5191 }
5192 if (SawUndef)
5193 return false;
5194 if (i > (unsigned)Mask[i])
5195 return false;
5196 if (Mask[i] <= Last)
5197 return false;
5198 Last = Mask[i];
5199 }
5200 return true;
5201}
5202
5203/// Given a shuffle where the indices are disjoint between the two sources,
5204/// e.g.:
5205///
5206/// t2:v4i8 = vector_shuffle t0:v4i8, t1:v4i8, <2, 7, 1, 4>
5207///
5208/// Merge the two sources into one and do a single source shuffle:
5209///
5210/// t2:v4i8 = vselect t1:v4i8, t0:v4i8, <0, 1, 0, 1>
5211/// t3:v4i8 = vector_shuffle t2:v4i8, undef, <2, 3, 1, 0>
5212///
5213/// A vselect will either be merged into a masked instruction or be lowered as a
5214/// vmerge.vvm, which is cheaper than a vrgather.vv.
5216 SelectionDAG &DAG,
5217 const RISCVSubtarget &Subtarget) {
5218 MVT VT = SVN->getSimpleValueType(0);
5219 MVT XLenVT = Subtarget.getXLenVT();
5220 SDLoc DL(SVN);
5221
5222 const ArrayRef<int> Mask = SVN->getMask();
5223
5224 // Work out which source each lane will come from.
5225 SmallVector<int, 16> Srcs(Mask.size(), -1);
5226
5227 for (int Idx : Mask) {
5228 if (Idx == -1)
5229 continue;
5230 unsigned SrcIdx = Idx % Mask.size();
5231 int Src = (uint32_t)Idx < Mask.size() ? 0 : 1;
5232 if (Srcs[SrcIdx] == -1)
5233 // Mark this source as using this lane.
5234 Srcs[SrcIdx] = Src;
5235 else if (Srcs[SrcIdx] != Src)
5236 // The other source is using this lane: not disjoint.
5237 return SDValue();
5238 }
5239
5240 SmallVector<SDValue> SelectMaskVals;
5241 for (int Lane : Srcs) {
5242 if (Lane == -1)
5243 SelectMaskVals.push_back(DAG.getUNDEF(XLenVT));
5244 else
5245 SelectMaskVals.push_back(DAG.getConstant(Lane ? 0 : 1, DL, XLenVT));
5246 }
5247 MVT MaskVT = VT.changeVectorElementType(MVT::i1);
5248 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, SelectMaskVals);
5249 SDValue Select = DAG.getNode(ISD::VSELECT, DL, VT, SelectMask,
5250 SVN->getOperand(0), SVN->getOperand(1));
5251
5252 // Move all indices relative to the first source.
5253 SmallVector<int> NewMask(Mask.size());
5254 for (unsigned I = 0; I < Mask.size(); I++) {
5255 if (Mask[I] == -1)
5256 NewMask[I] = -1;
5257 else
5258 NewMask[I] = Mask[I] % Mask.size();
5259 }
5260
5261 return DAG.getVectorShuffle(VT, DL, Select, DAG.getUNDEF(VT), NewMask);
5262}
5263
5264/// Try to widen element type to get a new mask value for a better permutation
5265/// sequence. This doesn't try to inspect the widened mask for profitability;
5266/// we speculate the widened form is equal or better. This has the effect of
5267/// reducing mask constant sizes - allowing cheaper materialization sequences
5268/// - and index sequence sizes - reducing register pressure and materialization
5269/// cost, at the cost of (possibly) an extra VTYPE toggle.
5271 SDLoc DL(Op);
5272 MVT VT = Op.getSimpleValueType();
5273 MVT ScalarVT = VT.getVectorElementType();
5274 unsigned ElementSize = ScalarVT.getFixedSizeInBits();
5275 SDValue V0 = Op.getOperand(0);
5276 SDValue V1 = Op.getOperand(1);
5277 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
5278
5279 // Avoid wasted work leading to isTypeLegal check failing below
5280 if (ElementSize > 32)
5281 return SDValue();
5282
5283 SmallVector<int, 8> NewMask;
5284 if (!widenShuffleMaskElts(Mask, NewMask))
5285 return SDValue();
5286
5287 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(ElementSize * 2)
5288 : MVT::getIntegerVT(ElementSize * 2);
5289 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
5290 if (!DAG.getTargetLoweringInfo().isTypeLegal(NewVT))
5291 return SDValue();
5292 V0 = DAG.getBitcast(NewVT, V0);
5293 V1 = DAG.getBitcast(NewVT, V1);
5294 return DAG.getBitcast(VT, DAG.getVectorShuffle(NewVT, DL, V0, V1, NewMask));
5295}
5296
5298 const RISCVSubtarget &Subtarget) {
5299 SDValue V1 = Op.getOperand(0);
5300 SDValue V2 = Op.getOperand(1);
5301 SDLoc DL(Op);
5302 MVT XLenVT = Subtarget.getXLenVT();
5303 MVT VT = Op.getSimpleValueType();
5304 unsigned NumElts = VT.getVectorNumElements();
5305 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
5306
5307 if (VT.getVectorElementType() == MVT::i1) {
5308 // Lower to a vror.vi of a larger element type if possible before we promote
5309 // i1s to i8s.
5310 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5311 return V;
5312 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
5313 return V;
5314
5315 // Promote i1 shuffle to i8 shuffle.
5316 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
5317 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
5318 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
5319 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
5320 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
5321 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
5322 ISD::SETNE);
5323 }
5324
5325 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5326
5327 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5328
5329 if (SVN->isSplat()) {
5330 const int Lane = SVN->getSplatIndex();
5331 if (Lane >= 0) {
5332 MVT SVT = VT.getVectorElementType();
5333
5334 // Turn splatted vector load into a strided load with an X0 stride.
5335 SDValue V = V1;
5336 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
5337 // with undef.
5338 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
5339 int Offset = Lane;
5340 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
5341 int OpElements =
5342 V.getOperand(0).getSimpleValueType().getVectorNumElements();
5343 V = V.getOperand(Offset / OpElements);
5344 Offset %= OpElements;
5345 }
5346
5347 // We need to ensure the load isn't atomic or volatile.
5348 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
5349 auto *Ld = cast<LoadSDNode>(V);
5350 Offset *= SVT.getStoreSize();
5351 SDValue NewAddr = DAG.getMemBasePlusOffset(
5352 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
5353
5354 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
5355 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
5356 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
5357 SDValue IntID =
5358 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
5359 SDValue Ops[] = {Ld->getChain(),
5360 IntID,
5361 DAG.getUNDEF(ContainerVT),
5362 NewAddr,
5363 DAG.getRegister(RISCV::X0, XLenVT),
5364 VL};
5365 SDValue NewLoad = DAG.getMemIntrinsicNode(
5366 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
5368 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
5369 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
5370 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
5371 }
5372
5373 MVT SplatVT = ContainerVT;
5374
5375 // f16 with zvfhmin and bf16 need to use an integer scalar load.
5376 if (SVT == MVT::bf16 ||
5377 (SVT == MVT::f16 && !Subtarget.hasStdExtZfh())) {
5378 SVT = MVT::i16;
5379 SplatVT = ContainerVT.changeVectorElementType(SVT);
5380 }
5381
5382 // Otherwise use a scalar load and splat. This will give the best
5383 // opportunity to fold a splat into the operation. ISel can turn it into
5384 // the x0 strided load if we aren't able to fold away the select.
5385 if (SVT.isFloatingPoint())
5386 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
5387 Ld->getPointerInfo().getWithOffset(Offset),
5388 Ld->getOriginalAlign(),
5389 Ld->getMemOperand()->getFlags());
5390 else
5391 V = DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
5392 Ld->getPointerInfo().getWithOffset(Offset), SVT,
5393 Ld->getOriginalAlign(),
5394 Ld->getMemOperand()->getFlags());
5396
5397 unsigned Opc = SplatVT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
5399 SDValue Splat =
5400 DAG.getNode(Opc, DL, SplatVT, DAG.getUNDEF(ContainerVT), V, VL);
5401 Splat = DAG.getBitcast(ContainerVT, Splat);
5402 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
5403 }
5404
5405 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5406 assert(Lane < (int)NumElts && "Unexpected lane!");
5407 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
5408 V1, DAG.getConstant(Lane, DL, XLenVT),
5409 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5410 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5411 }
5412 }
5413
5414 // For exact VLEN m2 or greater, try to split to m1 operations if we
5415 // can split cleanly.
5416 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
5417 return V;
5418
5419 ArrayRef<int> Mask = SVN->getMask();
5420
5421 if (SDValue V =
5422 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
5423 return V;
5424
5425 if (SDValue V =
5426 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
5427 return V;
5428
5429 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
5430 // available.
5431 if (Subtarget.hasStdExtZvkb())
5432 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5433 return V;
5434
5435 // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
5436 // be undef which can be handled with a single SLIDEDOWN/UP.
5437 int LoSrc, HiSrc;
5438 int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
5439 if (Rotation > 0) {
5440 SDValue LoV, HiV;
5441 if (LoSrc >= 0) {
5442 LoV = LoSrc == 0 ? V1 : V2;
5443 LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);
5444 }
5445 if (HiSrc >= 0) {
5446 HiV = HiSrc == 0 ? V1 : V2;
5447 HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);
5448 }
5449
5450 // We found a rotation. We need to slide HiV down by Rotation. Then we need
5451 // to slide LoV up by (NumElts - Rotation).
5452 unsigned InvRotate = NumElts - Rotation;
5453
5454 SDValue Res = DAG.getUNDEF(ContainerVT);
5455 if (HiV) {
5456 // Even though we could use a smaller VL, don't to avoid a vsetivli
5457 // toggle.
5458 Res = getVSlidedown(DAG, Subtarget, DL, ContainerVT, Res, HiV,
5459 DAG.getConstant(Rotation, DL, XLenVT), TrueMask, VL);
5460 }
5461 if (LoV)
5462 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, Res, LoV,
5463 DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL,
5465
5466 return convertFromScalableVector(VT, Res, DAG, Subtarget);
5467 }
5468
5469 if (ShuffleVectorInst::isReverseMask(Mask, NumElts) && V2.isUndef())
5470 return DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V1);
5471
5472 // If this is a deinterleave(2,4,8) and we can widen the vector, then we can
5473 // use shift and truncate to perform the shuffle.
5474 // TODO: For Factor=6, we can perform the first step of the deinterleave via
5475 // shift-and-trunc reducing total cost for everything except an mf8 result.
5476 // TODO: For Factor=4,8, we can do the same when the ratio isn't high enough
5477 // to do the entire operation.
5478 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
5479 const unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
5480 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
5481 for (unsigned Factor = 2; Factor <= MaxFactor; Factor <<= 1) {
5482 unsigned Index = 0;
5483 if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor, Index) &&
5484 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {
5485 if (SDValue Src = getSingleShuffleSrc(VT, ContainerVT, V1, V2))
5486 return getDeinterleaveShiftAndTrunc(DL, VT, Src, Factor, Index, DAG);
5487 }
5488 }
5489 }
5490
5491 if (SDValue V =
5492 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
5493 return V;
5494
5495 // Detect an interleave shuffle and lower to
5496 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
5497 int EvenSrc, OddSrc;
5498 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) {
5499 // Extract the halves of the vectors.
5500 MVT HalfVT = VT.getHalfNumVectorElementsVT();
5501
5502 // Recognize if one half is actually undef; the matching above will
5503 // otherwise reuse the even stream for the undef one. This improves
5504 // spread(2) shuffles.
5505 bool LaneIsUndef[2] = { true, true};
5506 for (unsigned i = 0; i < Mask.size(); i++)
5507 LaneIsUndef[i % 2] &= (Mask[i] == -1);
5508
5509 int Size = Mask.size();
5510 SDValue EvenV, OddV;
5511 if (LaneIsUndef[0]) {
5512 EvenV = DAG.getUNDEF(HalfVT);
5513 } else {
5514 assert(EvenSrc >= 0 && "Undef source?");
5515 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
5516 EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, EvenV,
5517 DAG.getVectorIdxConstant(EvenSrc % Size, DL));
5518 }
5519
5520 if (LaneIsUndef[1]) {
5521 OddV = DAG.getUNDEF(HalfVT);
5522 } else {
5523 assert(OddSrc >= 0 && "Undef source?");
5524 OddV = (OddSrc / Size) == 0 ? V1 : V2;
5525 OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV,
5526 DAG.getVectorIdxConstant(OddSrc % Size, DL));
5527 }
5528
5529 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
5530 }
5531
5532
5533 // Handle any remaining single source shuffles
5534 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
5535 if (V2.isUndef()) {
5536 // We might be able to express the shuffle as a bitrotate. But even if we
5537 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
5538 // shifts and a vor will have a higher throughput than a vrgather.
5539 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5540 return V;
5541
5542 // Before hitting generic lowering fallbacks, try to widen the mask
5543 // to a wider SEW.
5544 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
5545 return V;
5546
5547 // Can we generate a vcompress instead of a vrgather? These scale better
5548 // at high LMUL, at the cost of not being able to fold a following select
5549 // into them. The mask constants are also smaller than the index vector
5550 // constants, and thus easier to materialize.
5551 if (isCompressMask(Mask)) {
5552 SmallVector<SDValue> MaskVals(NumElts,
5553 DAG.getConstant(false, DL, XLenVT));
5554 for (auto Idx : Mask) {
5555 if (Idx == -1)
5556 break;
5557 assert(Idx >= 0 && (unsigned)Idx < NumElts);
5558 MaskVals[Idx] = DAG.getConstant(true, DL, XLenVT);
5559 }
5560 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5561 SDValue CompressMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5562 return DAG.getNode(ISD::VECTOR_COMPRESS, DL, VT, V1, CompressMask,
5563 DAG.getUNDEF(VT));
5564 }
5565
5566 // Match a spread(4,8) which can be done via extend and shift. Spread(2)
5567 // is fully covered in interleave(2) above, so it is ignored here.
5568 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
5569 unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
5570 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
5571 for (unsigned Factor = 4; Factor <= MaxFactor; Factor <<= 1) {
5572 unsigned Index;
5573 if (isSpreadMask(Mask, Factor, Index)) {
5574 MVT NarrowVT =
5575 MVT::getVectorVT(VT.getVectorElementType(), NumElts / Factor);
5576 SDValue Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT, V1,
5577 DAG.getVectorIdxConstant(0, DL));
5578 return getWideningSpread(Src, Factor, Index, DL, DAG);
5579 }
5580 }
5581 }
5582
5583 if (VT.getScalarSizeInBits() == 8 &&
5584 any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
5585 // On such a vector we're unable to use i8 as the index type.
5586 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
5587 // may involve vector splitting if we're already at LMUL=8, or our
5588 // user-supplied maximum fixed-length LMUL.
5589 return SDValue();
5590 }
5591
5592 // Base case for the two operand recursion below - handle the worst case
5593 // single source shuffle.
5594 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
5595 MVT IndexVT = VT.changeTypeToInteger();
5596 // Since we can't introduce illegal index types at this stage, use i16 and
5597 // vrgatherei16 if the corresponding index type for plain vrgather is greater
5598 // than XLenVT.
5599 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
5600 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5601 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5602 }
5603
5604 // If the mask allows, we can do all the index computation in 16 bits. This
5605 // requires less work and less register pressure at high LMUL, and creates
5606 // smaller constants which may be cheaper to materialize.
5607 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
5608 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
5609 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5610 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5611 }
5612
5613 MVT IndexContainerVT =
5614 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
5615
5616 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5617 SmallVector<SDValue> GatherIndicesLHS;
5618 for (int MaskIndex : Mask) {
5619 bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
5620 GatherIndicesLHS.push_back(IsLHSIndex
5621 ? DAG.getConstant(MaskIndex, DL, XLenVT)
5622 : DAG.getUNDEF(XLenVT));
5623 }
5624 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
5625 LHSIndices = convertToScalableVector(IndexContainerVT, LHSIndices, DAG,
5626 Subtarget);
5627 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
5628 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5629 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5630 }
5631
5632 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
5633 // merged with a second vrgather.
5634 SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
5635
5636 // Now construct the mask that will be used by the blended vrgather operation.
5637 // Construct the appropriate indices into each vector.
5638 for (int MaskIndex : Mask) {
5639 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
5640 ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
5641 ? MaskIndex : -1);
5642 ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
5643 }
5644
5645 // If the mask indices are disjoint between the two sources, we can lower it
5646 // as a vselect + a single source vrgather.vv. Don't do this if we think the
5647 // operands may end up being lowered to something cheaper than a vrgather.vv.
5648 if (!DAG.isSplatValue(V2) && !DAG.isSplatValue(V1) &&
5649 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskLHS.data(), VT) &&
5650 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskRHS.data(), VT) &&
5651 !ShuffleVectorInst::isIdentityMask(ShuffleMaskLHS, NumElts) &&
5652 !ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts))
5653 if (SDValue V = lowerDisjointIndicesShuffle(SVN, DAG, Subtarget))
5654 return V;
5655
5656 // Before hitting generic lowering fallbacks, try to widen the mask
5657 // to a wider SEW.
5658 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
5659 return V;
5660
5661 // Try to pick a profitable operand order.
5662 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
5663 SwapOps = SwapOps ^ ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts);
5664
5665 // Recursively invoke lowering for each operand if we had two
5666 // independent single source shuffles, and then combine the result via a
5667 // vselect. Note that the vselect will likely be folded back into the
5668 // second permute (vrgather, or other) by the post-isel combine.
5669 V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
5670 V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);
5671
5672 SmallVector<SDValue> MaskVals;
5673 for (int MaskIndex : Mask) {
5674 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
5675 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5676 }
5677
5678 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5679 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5680 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5681
5682 if (SwapOps)
5683 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
5684 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);
5685}
5686
5688 // Support splats for any type. These should type legalize well.
5689 if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
5690 return true;
5691
5692 // Only support legal VTs for other shuffles for now.
5693 if (!isTypeLegal(VT))
5694 return false;
5695
5696 MVT SVT = VT.getSimpleVT();
5697
5698 // Not for i1 vectors.
5699 if (SVT.getScalarType() == MVT::i1)
5700 return false;
5701
5702 int Dummy1, Dummy2;
5703 return (isElementRotate(Dummy1, Dummy2, M) > 0) ||
5704 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
5705}
5706
5707// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
5708// the exponent.
5709SDValue
5710RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
5711 SelectionDAG &DAG) const {
5712 MVT VT = Op.getSimpleValueType();
5713 unsigned EltSize = VT.getScalarSizeInBits();
5714 SDValue Src = Op.getOperand(0);
5715 SDLoc DL(Op);
5716 MVT ContainerVT = VT;
5717
5718 SDValue Mask, VL;
5719 if (Op->isVPOpcode()) {
5720 Mask = Op.getOperand(1);
5721 if (VT.isFixedLengthVector())
5722 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5723 Subtarget);
5724 VL = Op.getOperand(2);
5725 }
5726
5727 // We choose FP type that can represent the value if possible. Otherwise, we
5728 // use rounding to zero conversion for correct exponent of the result.
5729 // TODO: Use f16 for i8 when possible?
5730 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
5731 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
5732 FloatEltVT = MVT::f32;
5733 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
5734
5735 // Legal types should have been checked in the RISCVTargetLowering
5736 // constructor.
5737 // TODO: Splitting may make sense in some cases.
5738 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
5739 "Expected legal float type!");
5740
5741 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
5742 // The trailing zero count is equal to log2 of this single bit value.
5743 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
5744 SDValue Neg = DAG.getNegative(Src, DL, VT);
5745 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
5746 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
5747 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
5748 Src, Mask, VL);
5749 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
5750 }
5751
5752 // We have a legal FP type, convert to it.
5753 SDValue FloatVal;
5754 if (FloatVT.bitsGT(VT)) {
5755 if (Op->isVPOpcode())
5756 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
5757 else
5758 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
5759 } else {
5760 // Use RTZ to avoid rounding influencing exponent of FloatVal.
5761 if (VT.isFixedLengthVector()) {
5762 ContainerVT = getContainerForFixedLengthVector(VT);
5763 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
5764 }
5765 if (!Op->isVPOpcode())
5766 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5767 SDValue RTZRM =
5769 MVT ContainerFloatVT =
5770 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
5771 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
5772 Src, Mask, RTZRM, VL);
5773 if (VT.isFixedLengthVector())
5774 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
5775 }
5776 // Bitcast to integer and shift the exponent to the LSB.
5777 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
5778 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
5779 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
5780
5781 SDValue Exp;
5782 // Restore back to original type. Truncation after SRL is to generate vnsrl.
5783 if (Op->isVPOpcode()) {
5784 Exp = DAG.getNode(ISD::VP_SRL, DL, IntVT, Bitcast,
5785 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
5786 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
5787 } else {
5788 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
5789 DAG.getConstant(ShiftAmt, DL, IntVT));
5790 if (IntVT.bitsLT(VT))
5791 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
5792 else if (IntVT.bitsGT(VT))
5793 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
5794 }
5795
5796 // The exponent contains log2 of the value in biased form.
5797 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
5798 // For trailing zeros, we just need to subtract the bias.
5799 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
5800 return DAG.getNode(ISD::SUB, DL, VT, Exp,
5801 DAG.getConstant(ExponentBias, DL, VT));
5802 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
5803 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
5804 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
5805
5806 // For leading zeros, we need to remove the bias and convert from log2 to
5807 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
5808 unsigned Adjust = ExponentBias + (EltSize - 1);
5809 SDValue Res;
5810 if (Op->isVPOpcode())
5811 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
5812 Mask, VL);
5813 else
5814 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
5815
5816 // The above result with zero input equals to Adjust which is greater than
5817 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
5818 if (Op.getOpcode() == ISD::CTLZ)
5819 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
5820 else if (Op.getOpcode() == ISD::VP_CTLZ)
5821 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
5822 DAG.getConstant(EltSize, DL, VT), Mask, VL);
5823 return Res;
5824}
5825
5826SDValue RISCVTargetLowering::lowerVPCttzElements(SDValue Op,
5827 SelectionDAG &DAG) const {
5828 SDLoc DL(Op);
5829 MVT XLenVT = Subtarget.getXLenVT();
5830 SDValue Source = Op->getOperand(0);
5831 MVT SrcVT = Source.getSimpleValueType();
5832 SDValue Mask = Op->getOperand(1);
5833 SDValue EVL = Op->getOperand(2);
5834
5835 if (SrcVT.isFixedLengthVector()) {
5836 MVT ContainerVT = getContainerForFixedLengthVector(SrcVT);
5837 Source = convertToScalableVector(ContainerVT, Source, DAG, Subtarget);
5838 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5839 Subtarget);
5840 SrcVT = ContainerVT;
5841 }
5842
5843 // Convert to boolean vector.
5844 if (SrcVT.getScalarType() != MVT::i1) {
5845 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
5846 SrcVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorElementCount());
5847 Source = DAG.getNode(RISCVISD::SETCC_VL, DL, SrcVT,
5848 {Source, AllZero, DAG.getCondCode(ISD::SETNE),
5849 DAG.getUNDEF(SrcVT), Mask, EVL});
5850 }
5851
5852 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Source, Mask, EVL);
5853 if (Op->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_UNDEF)
5854 // In this case, we can interpret poison as -1, so nothing to do further.
5855 return Res;
5856
5857 // Convert -1 to VL.
5858 SDValue SetCC =
5859 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
5860 Res = DAG.getSelect(DL, XLenVT, SetCC, EVL, Res);
5861 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
5862}
5863
5864// While RVV has alignment restrictions, we should always be able to load as a
5865// legal equivalently-sized byte-typed vector instead. This method is
5866// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
5867// the load is already correctly-aligned, it returns SDValue().
5868SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
5869 SelectionDAG &DAG) const {
5870 auto *Load = cast<LoadSDNode>(Op);
5871 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
5872
5874 Load->getMemoryVT(),
5875 *Load->getMemOperand()))
5876 return SDValue();
5877
5878 SDLoc DL(Op);
5879 MVT VT = Op.getSimpleValueType();
5880 unsigned EltSizeBits = VT.getScalarSizeInBits();
5881 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5882 "Unexpected unaligned RVV load type");
5883 MVT NewVT =
5884 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5885 assert(NewVT.isValid() &&
5886 "Expecting equally-sized RVV vector types to be legal");
5887 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
5888 Load->getPointerInfo(), Load->getOriginalAlign(),
5889 Load->getMemOperand()->getFlags());
5890 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
5891}
5892
5893// While RVV has alignment restrictions, we should always be able to store as a
5894// legal equivalently-sized byte-typed vector instead. This method is
5895// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
5896// returns SDValue() if the store is already correctly aligned.
5897SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
5898 SelectionDAG &DAG) const {
5899 auto *Store = cast<StoreSDNode>(Op);
5900 assert(Store && Store->getValue().getValueType().isVector() &&
5901 "Expected vector store");
5902
5904 Store->getMemoryVT(),
5905 *Store->getMemOperand()))
5906 return SDValue();
5907
5908 SDLoc DL(Op);
5909 SDValue StoredVal = Store->getValue();
5910 MVT VT = StoredVal.getSimpleValueType();
5911 unsigned EltSizeBits = VT.getScalarSizeInBits();
5912 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5913 "Unexpected unaligned RVV store type");
5914 MVT NewVT =
5915 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5916 assert(NewVT.isValid() &&
5917 "Expecting equally-sized RVV vector types to be legal");
5918 StoredVal = DAG.getBitcast(NewVT, StoredVal);
5919 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
5920 Store->getPointerInfo(), Store->getOriginalAlign(),
5921 Store->getMemOperand()->getFlags());
5922}
5923
5925 const RISCVSubtarget &Subtarget) {
5926 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
5927
5928 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
5929
5930 // All simm32 constants should be handled by isel.
5931 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
5932 // this check redundant, but small immediates are common so this check
5933 // should have better compile time.
5934 if (isInt<32>(Imm))
5935 return Op;
5936
5937 // We only need to cost the immediate, if constant pool lowering is enabled.
5938 if (!Subtarget.useConstantPoolForLargeInts())
5939 return Op;
5940
5942 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
5943 return Op;
5944
5945 // Optimizations below are disabled for opt size. If we're optimizing for
5946 // size, use a constant pool.
5947 if (DAG.shouldOptForSize())
5948 return SDValue();
5949
5950 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
5951 // that if it will avoid a constant pool.
5952 // It will require an extra temporary register though.
5953 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
5954 // low and high 32 bits are the same and bit 31 and 63 are set.
5955 unsigned ShiftAmt, AddOpc;
5956 RISCVMatInt::InstSeq SeqLo =
5957 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
5958 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
5959 return Op;
5960
5961 return SDValue();
5962}
5963
5964SDValue RISCVTargetLowering::lowerConstantFP(SDValue Op,
5965 SelectionDAG &DAG) const {
5966 MVT VT = Op.getSimpleValueType();
5967 const APFloat &Imm = cast<ConstantFPSDNode>(Op)->getValueAPF();
5968
5969 // Can this constant be selected by a Zfa FLI instruction?
5970 bool Negate = false;
5971 int Index = getLegalZfaFPImm(Imm, VT);
5972
5973 // If the constant is negative, try negating.
5974 if (Index < 0 && Imm.isNegative()) {
5975 Index = getLegalZfaFPImm(-Imm, VT);
5976 Negate = true;
5977 }
5978
5979 // If we couldn't find a FLI lowering, fall back to generic code.
5980 if (Index < 0)
5981 return SDValue();
5982
5983 // Emit an FLI+FNEG. We use a custom node to hide from constant folding.
5984 SDLoc DL(Op);
5985 SDValue Const =
5986 DAG.getNode(RISCVISD::FLI, DL, VT,
5987 DAG.getTargetConstant(Index, DL, Subtarget.getXLenVT()));
5988 if (!Negate)
5989 return Const;
5990
5991 return DAG.getNode(ISD::FNEG, DL, VT, Const);
5992}
5993
5995 const RISCVSubtarget &Subtarget) {
5996 SDLoc dl(Op);
5997 AtomicOrdering FenceOrdering =
5998 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
5999 SyncScope::ID FenceSSID =
6000 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
6001
6002 if (Subtarget.hasStdExtZtso()) {
6003 // The only fence that needs an instruction is a sequentially-consistent
6004 // cross-thread fence.
6005 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
6006 FenceSSID == SyncScope::System)
6007 return Op;
6008
6009 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
6010 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
6011 }
6012
6013 // singlethread fences only synchronize with signal handlers on the same
6014 // thread and thus only need to preserve instruction order, not actually
6015 // enforce memory ordering.
6016 if (FenceSSID == SyncScope::SingleThread)
6017 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
6018 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
6019
6020 return Op;
6021}
6022
6023SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
6024 SelectionDAG &DAG) const {
6025 SDLoc DL(Op);
6026 MVT VT = Op.getSimpleValueType();
6027 MVT XLenVT = Subtarget.getXLenVT();
6028 unsigned Check = Op.getConstantOperandVal(1);
6029 unsigned TDCMask = 0;
6030 if (Check & fcSNan)
6031 TDCMask |= RISCV::FPMASK_Signaling_NaN;
6032 if (Check & fcQNan)
6033 TDCMask |= RISCV::FPMASK_Quiet_NaN;
6034 if (Check & fcPosInf)
6036 if (Check & fcNegInf)
6038 if (Check & fcPosNormal)
6040 if (Check & fcNegNormal)
6042 if (Check & fcPosSubnormal)
6044 if (Check & fcNegSubnormal)
6046 if (Check & fcPosZero)
6047 TDCMask |= RISCV::FPMASK_Positive_Zero;
6048 if (Check & fcNegZero)
6049 TDCMask |= RISCV::FPMASK_Negative_Zero;
6050
6051 bool IsOneBitMask = isPowerOf2_32(TDCMask);
6052
6053 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
6054
6055 if (VT.isVector()) {
6056 SDValue Op0 = Op.getOperand(0);
6057 MVT VT0 = Op.getOperand(0).getSimpleValueType();
6058
6059 if (VT.isScalableVector()) {
6061 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
6062 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6063 Mask = Op.getOperand(2);
6064 VL = Op.getOperand(3);
6065 }
6066 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
6067 VL, Op->getFlags());
6068 if (IsOneBitMask)
6069 return DAG.getSetCC(DL, VT, FPCLASS,
6070 DAG.getConstant(TDCMask, DL, DstVT),
6072 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
6073 DAG.getConstant(TDCMask, DL, DstVT));
6074 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
6075 ISD::SETNE);
6076 }
6077
6078 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
6079 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6080 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
6081 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
6082 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6083 Mask = Op.getOperand(2);
6084 MVT MaskContainerVT =
6085 getContainerForFixedLengthVector(Mask.getSimpleValueType());
6086 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
6087 VL = Op.getOperand(3);
6088 }
6089 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
6090
6091 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
6092 Mask, VL, Op->getFlags());
6093
6094 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
6095 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
6096 if (IsOneBitMask) {
6097 SDValue VMSEQ =
6098 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
6099 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
6100 DAG.getUNDEF(ContainerVT), Mask, VL});
6101 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
6102 }
6103 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
6104 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
6105
6106 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
6107 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
6108 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
6109
6110 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
6111 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
6112 DAG.getUNDEF(ContainerVT), Mask, VL});
6113 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
6114 }
6115
6116 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
6117 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
6118 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
6120 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6121}
6122
6123// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
6124// operations propagate nans.
6126 const RISCVSubtarget &Subtarget) {
6127 SDLoc DL(Op);
6128 MVT VT = Op.getSimpleValueType();
6129
6130 SDValue X = Op.getOperand(0);
6131 SDValue Y = Op.getOperand(1);
6132
6133 if (!VT.isVector()) {
6134 MVT XLenVT = Subtarget.getXLenVT();
6135
6136 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
6137 // ensures that when one input is a nan, the other will also be a nan
6138 // allowing the nan to propagate. If both inputs are nan, this will swap the
6139 // inputs which is harmless.
6140
6141 SDValue NewY = Y;
6142 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
6143 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
6144 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
6145 }
6146
6147 SDValue NewX = X;
6148 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
6149 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
6150 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
6151 }
6152
6153 unsigned Opc =
6154 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
6155 return DAG.getNode(Opc, DL, VT, NewX, NewY);
6156 }
6157
6158 // Check no NaNs before converting to fixed vector scalable.
6159 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
6160 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
6161
6162 MVT ContainerVT = VT;
6163 if (VT.isFixedLengthVector()) {
6164 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
6165 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
6166 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
6167 }
6168
6169 SDValue Mask, VL;
6170 if (Op->isVPOpcode()) {
6171 Mask = Op.getOperand(2);
6172 if (VT.isFixedLengthVector())
6173 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6174 Subtarget);
6175 VL = Op.getOperand(3);
6176 } else {
6177 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6178 }
6179
6180 SDValue NewY = Y;
6181 if (!XIsNeverNan) {
6182 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
6183 {X, X, DAG.getCondCode(ISD::SETOEQ),
6184 DAG.getUNDEF(ContainerVT), Mask, VL});
6185 NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X,
6186 DAG.getUNDEF(ContainerVT), VL);
6187 }
6188
6189 SDValue NewX = X;
6190 if (!YIsNeverNan) {
6191 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
6192 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
6193 DAG.getUNDEF(ContainerVT), Mask, VL});
6194 NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y,
6195 DAG.getUNDEF(ContainerVT), VL);
6196 }
6197
6198 unsigned Opc =
6199 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
6202 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
6203 DAG.getUNDEF(ContainerVT), Mask, VL);
6204 if (VT.isFixedLengthVector())
6205 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
6206 return Res;
6207}
6208
6210 const RISCVSubtarget &Subtarget) {
6211 bool IsFABS = Op.getOpcode() == ISD::FABS;
6212 assert((IsFABS || Op.getOpcode() == ISD::FNEG) &&
6213 "Wrong opcode for lowering FABS or FNEG.");
6214
6215 MVT XLenVT = Subtarget.getXLenVT();
6216 MVT VT = Op.getSimpleValueType();
6217 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
6218
6219 SDLoc DL(Op);
6220 SDValue Fmv =
6221 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op.getOperand(0));
6222
6223 APInt Mask = IsFABS ? APInt::getSignedMaxValue(16) : APInt::getSignMask(16);
6224 Mask = Mask.sext(Subtarget.getXLen());
6225
6226 unsigned LogicOpc = IsFABS ? ISD::AND : ISD::XOR;
6227 SDValue Logic =
6228 DAG.getNode(LogicOpc, DL, XLenVT, Fmv, DAG.getConstant(Mask, DL, XLenVT));
6229 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, Logic);
6230}
6231
6233 const RISCVSubtarget &Subtarget) {
6234 assert(Op.getOpcode() == ISD::FCOPYSIGN && "Unexpected opcode");
6235
6236 MVT XLenVT = Subtarget.getXLenVT();
6237 MVT VT = Op.getSimpleValueType();
6238 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
6239
6240 SDValue Mag = Op.getOperand(0);
6241 SDValue Sign = Op.getOperand(1);
6242
6243 SDLoc DL(Op);
6244
6245 // Get sign bit into an integer value.
6246 SDValue SignAsInt;
6247 unsigned SignSize = Sign.getValueSizeInBits();
6248 if (SignSize == Subtarget.getXLen()) {
6249 SignAsInt = DAG.getNode(ISD::BITCAST, DL, XLenVT, Sign);
6250 } else if (SignSize == 16) {
6251 SignAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Sign);
6252 } else if (SignSize == 32) {
6253 SignAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, XLenVT, Sign);
6254 } else if (SignSize == 64) {
6255 assert(XLenVT == MVT::i32 && "Unexpected type");
6256 // Copy the upper word to integer.
6257 SignAsInt = DAG.getNode(RISCVISD::SplitF64, DL, {MVT::i32, MVT::i32}, Sign)
6258 .getValue(1);
6259 SignSize = 32;
6260 } else
6261 llvm_unreachable("Unexpected sign size");
6262
6263 // Get the signbit at the right position for MagAsInt.
6264 int ShiftAmount = (int)SignSize - (int)Mag.getValueSizeInBits();
6265 if (ShiftAmount > 0) {
6266 SignAsInt = DAG.getNode(ISD::SRL, DL, XLenVT, SignAsInt,
6267 DAG.getConstant(ShiftAmount, DL, XLenVT));
6268 } else if (ShiftAmount < 0) {
6269 SignAsInt = DAG.getNode(ISD::SHL, DL, XLenVT, SignAsInt,
6270 DAG.getConstant(-ShiftAmount, DL, XLenVT));
6271 }
6272
6273 // Mask the sign bit and any bits above it. The extra bits will be dropped
6274 // when we convert back to FP.
6275 SDValue SignMask = DAG.getConstant(
6276 APInt::getSignMask(16).sext(Subtarget.getXLen()), DL, XLenVT);
6277 SDValue SignBit = DAG.getNode(ISD::AND, DL, XLenVT, SignAsInt, SignMask);
6278
6279 // Transform Mag value to integer, and clear the sign bit.
6280 SDValue MagAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Mag);
6281 SDValue ClearSignMask = DAG.getConstant(
6282 APInt::getSignedMaxValue(16).sext(Subtarget.getXLen()), DL, XLenVT);
6283 SDValue ClearedSign =
6284 DAG.getNode(ISD::AND, DL, XLenVT, MagAsInt, ClearSignMask);
6285
6286 SDValue CopiedSign = DAG.getNode(ISD::OR, DL, XLenVT, ClearedSign, SignBit,
6288
6289 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, CopiedSign);
6290}
6291
6292/// Get a RISC-V target specified VL op for a given SDNode.
6293static unsigned getRISCVVLOp(SDValue Op) {
6294#define OP_CASE(NODE) \
6295 case ISD::NODE: \
6296 return RISCVISD::NODE##_VL;
6297#define VP_CASE(NODE) \
6298 case ISD::VP_##NODE: \
6299 return RISCVISD::NODE##_VL;
6300 // clang-format off
6301 switch (Op.getOpcode()) {
6302 default:
6303 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
6304 OP_CASE(ADD)
6305 OP_CASE(SUB)
6306 OP_CASE(MUL)
6307 OP_CASE(MULHS)
6308 OP_CASE(MULHU)
6309 OP_CASE(SDIV)
6310 OP_CASE(SREM)
6311 OP_CASE(UDIV)
6312 OP_CASE(UREM)
6313 OP_CASE(SHL)
6314 OP_CASE(SRA)
6315 OP_CASE(SRL)
6316 OP_CASE(ROTL)
6317 OP_CASE(ROTR)
6318 OP_CASE(BSWAP)
6319 OP_CASE(CTTZ)
6320 OP_CASE(CTLZ)
6321 OP_CASE(CTPOP)
6322 OP_CASE(BITREVERSE)
6323 OP_CASE(SADDSAT)
6324 OP_CASE(UADDSAT)
6325 OP_CASE(SSUBSAT)
6326 OP_CASE(USUBSAT)
6327 OP_CASE(AVGFLOORS)
6328 OP_CASE(AVGFLOORU)
6329 OP_CASE(AVGCEILS)
6330 OP_CASE(AVGCEILU)
6331 OP_CASE(FADD)
6332 OP_CASE(FSUB)
6333 OP_CASE(FMUL)
6334 OP_CASE(FDIV)
6335 OP_CASE(FNEG)
6336 OP_CASE(FABS)
6337 OP_CASE(FSQRT)
6338 OP_CASE(SMIN)
6339 OP_CASE(SMAX)
6340 OP_CASE(UMIN)
6341 OP_CASE(UMAX)
6342 OP_CASE(STRICT_FADD)
6343 OP_CASE(STRICT_FSUB)
6344 OP_CASE(STRICT_FMUL)
6345 OP_CASE(STRICT_FDIV)
6346 OP_CASE(STRICT_FSQRT)
6347 VP_CASE(ADD) // VP_ADD
6348 VP_CASE(SUB) // VP_SUB
6349 VP_CASE(MUL) // VP_MUL
6350 VP_CASE(SDIV) // VP_SDIV
6351 VP_CASE(SREM) // VP_SREM
6352 VP_CASE(UDIV) // VP_UDIV
6353 VP_CASE(UREM) // VP_UREM
6354 VP_CASE(SHL) // VP_SHL
6355 VP_CASE(FADD) // VP_FADD
6356 VP_CASE(FSUB) // VP_FSUB
6357 VP_CASE(FMUL) // VP_FMUL
6358 VP_CASE(FDIV) // VP_FDIV
6359 VP_CASE(FNEG) // VP_FNEG
6360 VP_CASE(FABS) // VP_FABS
6361 VP_CASE(SMIN) // VP_SMIN
6362 VP_CASE(SMAX) // VP_SMAX
6363 VP_CASE(UMIN) // VP_UMIN
6364 VP_CASE(UMAX) // VP_UMAX
6365 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
6366 VP_CASE(SETCC) // VP_SETCC
6367 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
6368 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
6369 VP_CASE(BITREVERSE) // VP_BITREVERSE
6370 VP_CASE(SADDSAT) // VP_SADDSAT
6371 VP_CASE(UADDSAT) // VP_UADDSAT
6372 VP_CASE(SSUBSAT) // VP_SSUBSAT
6373 VP_CASE(USUBSAT) // VP_USUBSAT
6374 VP_CASE(BSWAP) // VP_BSWAP
6375 VP_CASE(CTLZ) // VP_CTLZ
6376 VP_CASE(CTTZ) // VP_CTTZ
6377 VP_CASE(CTPOP) // VP_CTPOP
6379 case ISD::VP_CTLZ_ZERO_UNDEF:
6380 return RISCVISD::CTLZ_VL;
6382 case ISD::VP_CTTZ_ZERO_UNDEF:
6383 return RISCVISD::CTTZ_VL;
6384 case ISD::FMA:
6385 case ISD::VP_FMA:
6386 return RISCVISD::VFMADD_VL;
6387 case ISD::STRICT_FMA:
6389 case ISD::AND:
6390 case ISD::VP_AND:
6391 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6392 return RISCVISD::VMAND_VL;
6393 return RISCVISD::AND_VL;
6394 case ISD::OR:
6395 case ISD::VP_OR:
6396 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6397 return RISCVISD::VMOR_VL;
6398 return RISCVISD::OR_VL;
6399 case ISD::XOR:
6400 case ISD::VP_XOR:
6401 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6402 return RISCVISD::VMXOR_VL;
6403 return RISCVISD::XOR_VL;
6404 case ISD::VP_SELECT:
6405 case ISD::VP_MERGE:
6406 return RISCVISD::VMERGE_VL;
6407 case ISD::VP_SRA:
6408 return RISCVISD::SRA_VL;
6409 case ISD::VP_SRL:
6410 return RISCVISD::SRL_VL;
6411 case ISD::VP_SQRT:
6412 return RISCVISD::FSQRT_VL;
6413 case ISD::VP_SIGN_EXTEND:
6414 return RISCVISD::VSEXT_VL;
6415 case ISD::VP_ZERO_EXTEND:
6416 return RISCVISD::VZEXT_VL;
6417 case ISD::VP_FP_TO_SINT:
6419 case ISD::VP_FP_TO_UINT:
6421 case ISD::FMINNUM:
6422 case ISD::VP_FMINNUM:
6423 return RISCVISD::VFMIN_VL;
6424 case ISD::FMAXNUM:
6425 case ISD::VP_FMAXNUM:
6426 return RISCVISD::VFMAX_VL;
6427 case ISD::LRINT:
6428 case ISD::VP_LRINT:
6429 case ISD::LLRINT:
6430 case ISD::VP_LLRINT:
6432 }
6433 // clang-format on
6434#undef OP_CASE
6435#undef VP_CASE
6436}
6437
6438/// Return true if a RISC-V target specified op has a passthru operand.
6439static bool hasPassthruOp(unsigned Opcode) {
6440 assert(Opcode > RISCVISD::FIRST_NUMBER &&
6442 "not a RISC-V target specific op");
6443 static_assert(
6446 "adding target specific op should update this function");
6447 if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL)
6448 return true;
6449 if (Opcode == RISCVISD::FCOPYSIGN_VL)
6450 return true;
6451 if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL)
6452 return true;
6453 if (Opcode == RISCVISD::SETCC_VL)
6454 return true;
6455 if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL)
6456 return true;
6457 if (Opcode == RISCVISD::VMERGE_VL)
6458 return true;
6459 return false;
6460}
6461
6462/// Return true if a RISC-V target specified op has a mask operand.
6463static bool hasMaskOp(unsigned Opcode) {
6464 assert(Opcode > RISCVISD::FIRST_NUMBER &&
6466 "not a RISC-V target specific op");
6467 static_assert(
6470 "adding target specific op should update this function");
6471 if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
6472 return true;
6473 if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL)
6474 return true;
6475 if (Opcode >= RISCVISD::STRICT_FADD_VL &&
6477 return true;
6478 return false;
6479}
6480
6482 const RISCVSubtarget &Subtarget) {
6483 if (Op.getValueType() == MVT::nxv32f16 &&
6484 (Subtarget.hasVInstructionsF16Minimal() &&
6485 !Subtarget.hasVInstructionsF16()))
6486 return true;
6487 if (Op.getValueType() == MVT::nxv32bf16)
6488 return true;
6489 return false;
6490}
6491
6493 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
6494 SDLoc DL(Op);
6495
6498
6499 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6500 if (!Op.getOperand(j).getValueType().isVector()) {
6501 LoOperands[j] = Op.getOperand(j);
6502 HiOperands[j] = Op.getOperand(j);
6503 continue;
6504 }
6505 std::tie(LoOperands[j], HiOperands[j]) =
6506 DAG.SplitVector(Op.getOperand(j), DL);
6507 }
6508
6509 SDValue LoRes =
6510 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
6511 SDValue HiRes =
6512 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
6513
6514 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
6515}
6516
6518 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
6519 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
6520 SDLoc DL(Op);
6521
6524
6525 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6526 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
6527 std::tie(LoOperands[j], HiOperands[j]) =
6528 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
6529 continue;
6530 }
6531 if (!Op.getOperand(j).getValueType().isVector()) {
6532 LoOperands[j] = Op.getOperand(j);
6533 HiOperands[j] = Op.getOperand(j);
6534 continue;
6535 }
6536 std::tie(LoOperands[j], HiOperands[j]) =
6537 DAG.SplitVector(Op.getOperand(j), DL);
6538 }
6539
6540 SDValue LoRes =
6541 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
6542 SDValue HiRes =
6543 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
6544
6545 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
6546}
6547
6549 SDLoc DL(Op);
6550
6551 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
6552 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
6553 auto [EVLLo, EVLHi] =
6554 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
6555
6556 SDValue ResLo =
6557 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6558 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
6559 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6560 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
6561}
6562
6564
6565 assert(Op->isStrictFPOpcode());
6566
6567 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
6568
6569 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
6570 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
6571
6572 SDLoc DL(Op);
6573
6576
6577 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6578 if (!Op.getOperand(j).getValueType().isVector()) {
6579 LoOperands[j] = Op.getOperand(j);
6580 HiOperands[j] = Op.getOperand(j);
6581 continue;
6582 }
6583 std::tie(LoOperands[j], HiOperands[j]) =
6584 DAG.SplitVector(Op.getOperand(j), DL);
6585 }
6586
6587 SDValue LoRes =
6588 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
6589 HiOperands[0] = LoRes.getValue(1);
6590 SDValue HiRes =
6591 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
6592
6593 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
6594 LoRes.getValue(0), HiRes.getValue(0));
6595 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
6596}
6597
6599 SelectionDAG &DAG) const {
6600 switch (Op.getOpcode()) {
6601 default:
6602 report_fatal_error("unimplemented operand");
6603 case ISD::ATOMIC_FENCE:
6604 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
6605 case ISD::GlobalAddress:
6606 return lowerGlobalAddress(Op, DAG);
6607 case ISD::BlockAddress:
6608 return lowerBlockAddress(Op, DAG);
6609 case ISD::ConstantPool:
6610 return lowerConstantPool(Op, DAG);
6611 case ISD::JumpTable:
6612 return lowerJumpTable(Op, DAG);
6614 return lowerGlobalTLSAddress(Op, DAG);
6615 case ISD::Constant:
6616 return lowerConstant(Op, DAG, Subtarget);
6617 case ISD::ConstantFP:
6618 return lowerConstantFP(Op, DAG);
6619 case ISD::SELECT:
6620 return lowerSELECT(Op, DAG);
6621 case ISD::BRCOND:
6622 return lowerBRCOND(Op, DAG);
6623 case ISD::VASTART:
6624 return lowerVASTART(Op, DAG);
6625 case ISD::FRAMEADDR:
6626 return lowerFRAMEADDR(Op, DAG);
6627 case ISD::RETURNADDR:
6628 return lowerRETURNADDR(Op, DAG);
6629 case ISD::SHL_PARTS:
6630 return lowerShiftLeftParts(Op, DAG);
6631 case ISD::SRA_PARTS:
6632 return lowerShiftRightParts(Op, DAG, true);
6633 case ISD::SRL_PARTS:
6634 return lowerShiftRightParts(Op, DAG, false);
6635 case ISD::ROTL:
6636 case ISD::ROTR:
6637 if (Op.getValueType().isFixedLengthVector()) {
6638 assert(Subtarget.hasStdExtZvkb());
6639 return lowerToScalableOp(Op, DAG);
6640 }
6641 assert(Subtarget.hasVendorXTHeadBb() &&
6642 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
6643 "Unexpected custom legalization");
6644 // XTHeadBb only supports rotate by constant.
6645 if (!isa<ConstantSDNode>(Op.getOperand(1)))
6646 return SDValue();
6647 return Op;
6648 case ISD::BITCAST: {
6649 SDLoc DL(Op);
6650 EVT VT = Op.getValueType();
6651 SDValue Op0 = Op.getOperand(0);
6652 EVT Op0VT = Op0.getValueType();
6653 MVT XLenVT = Subtarget.getXLenVT();
6654 if (Op0VT == MVT::i16 &&
6655 ((VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
6656 (VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
6657 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
6658 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, NewOp0);
6659 }
6660 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
6661 Subtarget.hasStdExtFOrZfinx()) {
6662 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6663 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
6664 }
6665 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit() &&
6666 Subtarget.hasStdExtDOrZdinx()) {
6667 SDValue Lo, Hi;
6668 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
6669 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
6670 }
6671
6672 // Consider other scalar<->scalar casts as legal if the types are legal.
6673 // Otherwise expand them.
6674 if (!VT.isVector() && !Op0VT.isVector()) {
6675 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
6676 return Op;
6677 return SDValue();
6678 }
6679
6680 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
6681 "Unexpected types");
6682
6683 if (VT.isFixedLengthVector()) {
6684 // We can handle fixed length vector bitcasts with a simple replacement
6685 // in isel.
6686 if (Op0VT.isFixedLengthVector())
6687 return Op;
6688 // When bitcasting from scalar to fixed-length vector, insert the scalar
6689 // into a one-element vector of the result type, and perform a vector
6690 // bitcast.
6691 if (!Op0VT.isVector()) {
6692 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
6693 if (!isTypeLegal(BVT))
6694 return SDValue();
6695 return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
6696 DAG.getUNDEF(BVT), Op0,
6697 DAG.getVectorIdxConstant(0, DL)));
6698 }
6699 return SDValue();
6700 }
6701 // Custom-legalize bitcasts from fixed-length vector types to scalar types
6702 // thus: bitcast the vector to a one-element vector type whose element type
6703 // is the same as the result type, and extract the first element.
6704 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
6705 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
6706 if (!isTypeLegal(BVT))
6707 return SDValue();
6708 SDValue BVec = DAG.getBitcast(BVT, Op0);
6709 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
6710 DAG.getVectorIdxConstant(0, DL));
6711 }
6712 return SDValue();
6713 }
6715 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
6717 return LowerINTRINSIC_W_CHAIN(Op, DAG);
6719 return LowerINTRINSIC_VOID(Op, DAG);
6720 case ISD::IS_FPCLASS:
6721 return LowerIS_FPCLASS(Op, DAG);
6722 case ISD::BITREVERSE: {
6723 MVT VT = Op.getSimpleValueType();
6724 if (VT.isFixedLengthVector()) {
6725 assert(Subtarget.hasStdExtZvbb());
6726 return lowerToScalableOp(Op, DAG);
6727 }
6728 SDLoc DL(Op);
6729 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
6730 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
6731 // Expand bitreverse to a bswap(rev8) followed by brev8.
6732 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
6733 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
6734 }
6735 case ISD::TRUNCATE:
6738 // Only custom-lower vector truncates
6739 if (!Op.getSimpleValueType().isVector())
6740 return Op;
6741 return lowerVectorTruncLike(Op, DAG);
6742 case ISD::ANY_EXTEND:
6743 case ISD::ZERO_EXTEND:
6744 if (Op.getOperand(0).getValueType().isVector() &&
6745 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6746 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
6747 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
6748 case ISD::SIGN_EXTEND:
6749 if (Op.getOperand(0).getValueType().isVector() &&
6750 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6751 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
6752 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
6754 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
6756 return lowerINSERT_VECTOR_ELT(Op, DAG);
6758 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
6759 case ISD::SCALAR_TO_VECTOR: {
6760 MVT VT = Op.getSimpleValueType();
6761 SDLoc DL(Op);
6762 SDValue Scalar = Op.getOperand(0);
6763 if (VT.getVectorElementType() == MVT::i1) {
6764 MVT WideVT = VT.changeVectorElementType(MVT::i8);
6765 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
6766 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
6767 }
6768 MVT ContainerVT = VT;
6769 if (VT.isFixedLengthVector())
6770 ContainerVT = getContainerForFixedLengthVector(VT);
6771 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
6772
6773 SDValue V;
6774 if (VT.isFloatingPoint()) {
6775 V = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, ContainerVT,
6776 DAG.getUNDEF(ContainerVT), Scalar, VL);
6777 } else {
6778 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
6779 V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
6780 DAG.getUNDEF(ContainerVT), Scalar, VL);
6781 }
6782 if (VT.isFixedLengthVector())
6783 V = convertFromScalableVector(VT, V, DAG, Subtarget);
6784 return V;
6785 }
6786 case ISD::VSCALE: {
6787 MVT XLenVT = Subtarget.getXLenVT();
6788 MVT VT = Op.getSimpleValueType();
6789 SDLoc DL(Op);
6790 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
6791 // We define our scalable vector types for lmul=1 to use a 64 bit known
6792 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
6793 // vscale as VLENB / 8.
6794 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
6795 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
6796 report_fatal_error("Support for VLEN==32 is incomplete.");
6797 // We assume VLENB is a multiple of 8. We manually choose the best shift
6798 // here because SimplifyDemandedBits isn't always able to simplify it.
6799 uint64_t Val = Op.getConstantOperandVal(0);
6800 if (isPowerOf2_64(Val)) {
6801 uint64_t Log2 = Log2_64(Val);
6802 if (Log2 < 3)
6803 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6804 DAG.getConstant(3 - Log2, DL, VT));
6805 else if (Log2 > 3)
6806 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
6807 DAG.getConstant(Log2 - 3, DL, XLenVT));
6808 } else if ((Val % 8) == 0) {
6809 // If the multiplier is a multiple of 8, scale it down to avoid needing
6810 // to shift the VLENB value.
6811 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
6812 DAG.getConstant(Val / 8, DL, XLenVT));
6813 } else {
6814 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6815 DAG.getConstant(3, DL, XLenVT));
6816 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
6817 DAG.getConstant(Val, DL, XLenVT));
6818 }
6819 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6820 }
6821 case ISD::FPOWI: {
6822 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
6823 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
6824 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
6825 Op.getOperand(1).getValueType() == MVT::i32) {
6826 SDLoc DL(Op);
6827 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6828 SDValue Powi =
6829 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
6830 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
6831 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6832 }
6833 return SDValue();
6834 }
6835 case ISD::FMAXIMUM:
6836 case ISD::FMINIMUM:
6837 if (isPromotedOpNeedingSplit(Op, Subtarget))
6838 return SplitVectorOp(Op, DAG);
6839 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
6840 case ISD::FP_EXTEND:
6841 case ISD::FP_ROUND:
6842 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6845 return lowerStrictFPExtendOrRoundLike(Op, DAG);
6846 case ISD::SINT_TO_FP:
6847 case ISD::UINT_TO_FP:
6848 if (Op.getValueType().isVector() &&
6849 ((Op.getValueType().getScalarType() == MVT::f16 &&
6850 (Subtarget.hasVInstructionsF16Minimal() &&
6851 !Subtarget.hasVInstructionsF16())) ||
6852 Op.getValueType().getScalarType() == MVT::bf16)) {
6853 if (isPromotedOpNeedingSplit(Op, Subtarget))
6854 return SplitVectorOp(Op, DAG);
6855 // int -> f32
6856 SDLoc DL(Op);
6857 MVT NVT =
6858 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
6859 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
6860 // f32 -> [b]f16
6861 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
6862 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6863 }
6864 [[fallthrough]];
6865 case ISD::FP_TO_SINT:
6866 case ISD::FP_TO_UINT:
6867 if (SDValue Op1 = Op.getOperand(0);
6868 Op1.getValueType().isVector() &&
6869 ((Op1.getValueType().getScalarType() == MVT::f16 &&
6870 (Subtarget.hasVInstructionsF16Minimal() &&
6871 !Subtarget.hasVInstructionsF16())) ||
6872 Op1.getValueType().getScalarType() == MVT::bf16)) {
6873 if (isPromotedOpNeedingSplit(Op1, Subtarget))
6874 return SplitVectorOp(Op, DAG);
6875 // [b]f16 -> f32
6876 SDLoc DL(Op);
6877 MVT NVT = MVT::getVectorVT(MVT::f32,
6878 Op1.getValueType().getVectorElementCount());
6879 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
6880 // f32 -> int
6881 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
6882 }
6883 [[fallthrough]];
6888 // RVV can only do fp<->int conversions to types half/double the size as
6889 // the source. We custom-lower any conversions that do two hops into
6890 // sequences.
6891 MVT VT = Op.getSimpleValueType();
6892 if (VT.isScalarInteger())
6893 return lowerFP_TO_INT(Op, DAG, Subtarget);
6894 bool IsStrict = Op->isStrictFPOpcode();
6895 SDValue Src = Op.getOperand(0 + IsStrict);
6896 MVT SrcVT = Src.getSimpleValueType();
6897 if (SrcVT.isScalarInteger())
6898 return lowerINT_TO_FP(Op, DAG, Subtarget);
6899 if (!VT.isVector())
6900 return Op;
6901 SDLoc DL(Op);
6902 MVT EltVT = VT.getVectorElementType();
6903 MVT SrcEltVT = SrcVT.getVectorElementType();
6904 unsigned EltSize = EltVT.getSizeInBits();
6905 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
6906 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
6907 "Unexpected vector element types");
6908
6909 bool IsInt2FP = SrcEltVT.isInteger();
6910 // Widening conversions
6911 if (EltSize > (2 * SrcEltSize)) {
6912 if (IsInt2FP) {
6913 // Do a regular integer sign/zero extension then convert to float.
6914 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
6916 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
6917 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
6920 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
6921 if (IsStrict)
6922 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
6923 Op.getOperand(0), Ext);
6924 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
6925 }
6926 // FP2Int
6927 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
6928 // Do one doubling fp_extend then complete the operation by converting
6929 // to int.
6930 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6931 if (IsStrict) {
6932 auto [FExt, Chain] =
6933 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
6934 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
6935 }
6936 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
6937 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
6938 }
6939
6940 // Narrowing conversions
6941 if (SrcEltSize > (2 * EltSize)) {
6942 if (IsInt2FP) {
6943 // One narrowing int_to_fp, then an fp_round.
6944 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
6945 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6946 if (IsStrict) {
6947 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
6948 DAG.getVTList(InterimFVT, MVT::Other),
6949 Op.getOperand(0), Src);
6950 SDValue Chain = Int2FP.getValue(1);
6951 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
6952 }
6953 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
6954 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
6955 }
6956 // FP2Int
6957 // One narrowing fp_to_int, then truncate the integer. If the float isn't
6958 // representable by the integer, the result is poison.
6959 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
6961 if (IsStrict) {
6962 SDValue FP2Int =
6963 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
6964 Op.getOperand(0), Src);
6965 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
6966 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
6967 }
6968 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
6969 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
6970 }
6971
6972 // Scalable vectors can exit here. Patterns will handle equally-sized
6973 // conversions halving/doubling ones.
6974 if (!VT.isFixedLengthVector())
6975 return Op;
6976
6977 // For fixed-length vectors we lower to a custom "VL" node.
6978 unsigned RVVOpc = 0;
6979 switch (Op.getOpcode()) {
6980 default:
6981 llvm_unreachable("Impossible opcode");
6982 case ISD::FP_TO_SINT:
6984 break;
6985 case ISD::FP_TO_UINT:
6987 break;
6988 case ISD::SINT_TO_FP:
6989 RVVOpc = RISCVISD::SINT_TO_FP_VL;
6990 break;
6991 case ISD::UINT_TO_FP:
6992 RVVOpc = RISCVISD::UINT_TO_FP_VL;
6993 break;
6996 break;
6999 break;
7002 break;
7005 break;
7006 }
7007
7008 MVT ContainerVT = getContainerForFixedLengthVector(VT);
7009 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
7010 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
7011 "Expected same element count");
7012
7013 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
7014
7015 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
7016 if (IsStrict) {
7017 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
7018 Op.getOperand(0), Src, Mask, VL);
7019 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
7020 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
7021 }
7022 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
7023 return convertFromScalableVector(VT, Src, DAG, Subtarget);
7024 }
7027 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
7028 case ISD::FP_TO_BF16: {
7029 // Custom lower to ensure the libcall return is passed in an FPR on hard
7030 // float ABIs.
7031 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
7032 SDLoc DL(Op);
7033 MakeLibCallOptions CallOptions;
7034 RTLIB::Libcall LC =
7035 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
7036 SDValue Res =
7037 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
7038 if (Subtarget.is64Bit())
7039 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
7040 return DAG.getBitcast(MVT::i32, Res);
7041 }
7042 case ISD::BF16_TO_FP: {
7043 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
7044 MVT VT = Op.getSimpleValueType();
7045 SDLoc DL(Op);
7046 Op = DAG.getNode(
7047 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
7048 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
7049 SDValue Res = Subtarget.is64Bit()
7050 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
7051 : DAG.getBitcast(MVT::f32, Op);
7052 // fp_extend if the target VT is bigger than f32.
7053 if (VT != MVT::f32)
7054 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
7055 return Res;
7056 }
7058 case ISD::FP_TO_FP16: {
7059 // Custom lower to ensure the libcall return is passed in an FPR on hard
7060 // float ABIs.
7061 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7062 SDLoc DL(Op);
7063 MakeLibCallOptions CallOptions;
7064 bool IsStrict = Op->isStrictFPOpcode();
7065 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
7066 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7067 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
7068 SDValue Res;
7069 std::tie(Res, Chain) =
7070 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
7071 if (Subtarget.is64Bit())
7072 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
7073 SDValue Result = DAG.getBitcast(MVT::i32, IsStrict ? Res.getValue(0) : Res);
7074 if (IsStrict)
7075 return DAG.getMergeValues({Result, Chain}, DL);
7076 return Result;
7077 }
7079 case ISD::FP16_TO_FP: {
7080 // Custom lower to ensure the libcall argument is passed in an FPR on hard
7081 // float ABIs.
7082 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7083 SDLoc DL(Op);
7084 MakeLibCallOptions CallOptions;
7085 bool IsStrict = Op->isStrictFPOpcode();
7086 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
7087 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7088 SDValue Arg = Subtarget.is64Bit()
7089 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op0)
7090 : DAG.getBitcast(MVT::f32, Op0);
7091 SDValue Res;
7092 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
7093 CallOptions, DL, Chain);
7094 if (IsStrict)
7095 return DAG.getMergeValues({Res, Chain}, DL);
7096 return Res;
7097 }
7098 case ISD::FTRUNC:
7099 case ISD::FCEIL:
7100 case ISD::FFLOOR:
7101 case ISD::FNEARBYINT:
7102 case ISD::FRINT:
7103 case ISD::FROUND:
7104 case ISD::FROUNDEVEN:
7105 if (isPromotedOpNeedingSplit(Op, Subtarget))
7106 return SplitVectorOp(Op, DAG);
7107 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7108 case ISD::LRINT:
7109 case ISD::LLRINT:
7110 if (Op.getValueType().isVector())
7111 return lowerVectorXRINT(Op, DAG, Subtarget);
7112 [[fallthrough]];
7113 case ISD::LROUND:
7114 case ISD::LLROUND: {
7115 assert(Op.getOperand(0).getValueType() == MVT::f16 &&
7116 "Unexpected custom legalisation");
7117 SDLoc DL(Op);
7118 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
7119 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), Ext);
7120 }
7121 case ISD::STRICT_LRINT:
7122 case ISD::STRICT_LLRINT:
7123 case ISD::STRICT_LROUND:
7124 case ISD::STRICT_LLROUND: {
7125 assert(Op.getOperand(1).getValueType() == MVT::f16 &&
7126 "Unexpected custom legalisation");
7127 SDLoc DL(Op);
7128 SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
7129 {Op.getOperand(0), Op.getOperand(1)});
7130 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
7131 {Ext.getValue(1), Ext.getValue(0)});
7132 }
7133 case ISD::VECREDUCE_ADD:
7138 return lowerVECREDUCE(Op, DAG);
7139 case ISD::VECREDUCE_AND:
7140 case ISD::VECREDUCE_OR:
7141 case ISD::VECREDUCE_XOR:
7142 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7143 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
7144 return lowerVECREDUCE(Op, DAG);
7151 return lowerFPVECREDUCE(Op, DAG);
7152 case ISD::VP_REDUCE_ADD:
7153 case ISD::VP_REDUCE_UMAX:
7154 case ISD::VP_REDUCE_SMAX:
7155 case ISD::VP_REDUCE_UMIN:
7156 case ISD::VP_REDUCE_SMIN:
7157 case ISD::VP_REDUCE_FADD:
7158 case ISD::VP_REDUCE_SEQ_FADD:
7159 case ISD::VP_REDUCE_FMIN:
7160 case ISD::VP_REDUCE_FMAX:
7161 case ISD::VP_REDUCE_FMINIMUM:
7162 case ISD::VP_REDUCE_FMAXIMUM:
7163 if (isPromotedOpNeedingSplit(Op.getOperand(1), Subtarget))
7164 return SplitVectorReductionOp(Op, DAG);
7165 return lowerVPREDUCE(Op, DAG);
7166 case ISD::VP_REDUCE_AND:
7167 case ISD::VP_REDUCE_OR:
7168 case ISD::VP_REDUCE_XOR:
7169 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
7170 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
7171 return lowerVPREDUCE(Op, DAG);
7172 case ISD::VP_CTTZ_ELTS:
7173 case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
7174 return lowerVPCttzElements(Op, DAG);
7175 case ISD::UNDEF: {
7176 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
7177 return convertFromScalableVector(Op.getSimpleValueType(),
7178 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
7179 }
7181 return lowerINSERT_SUBVECTOR(Op, DAG);
7183 return lowerEXTRACT_SUBVECTOR(Op, DAG);
7185 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
7187 return lowerVECTOR_INTERLEAVE(Op, DAG);
7188 case ISD::STEP_VECTOR:
7189 return lowerSTEP_VECTOR(Op, DAG);
7191 return lowerVECTOR_REVERSE(Op, DAG);
7192 case ISD::VECTOR_SPLICE:
7193 return lowerVECTOR_SPLICE(Op, DAG);
7194 case ISD::BUILD_VECTOR:
7195 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
7196 case ISD::SPLAT_VECTOR: {
7197 MVT VT = Op.getSimpleValueType();
7198 MVT EltVT = VT.getVectorElementType();
7199 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
7200 EltVT == MVT::bf16) {
7201 SDLoc DL(Op);
7202 SDValue Elt;
7203 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
7204 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
7205 Elt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(),
7206 Op.getOperand(0));
7207 else
7208 Elt = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Op.getOperand(0));
7209 MVT IVT = VT.changeVectorElementType(MVT::i16);
7210 return DAG.getNode(ISD::BITCAST, DL, VT,
7211 DAG.getNode(ISD::SPLAT_VECTOR, DL, IVT, Elt));
7212 }
7213
7214 if (EltVT == MVT::i1)
7215 return lowerVectorMaskSplat(Op, DAG);
7216 return SDValue();
7217 }
7219 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
7220 case ISD::CONCAT_VECTORS: {
7221 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
7222 // better than going through the stack, as the default expansion does.
7223 SDLoc DL(Op);
7224 MVT VT = Op.getSimpleValueType();
7225 MVT ContainerVT = VT;
7226 if (VT.isFixedLengthVector())
7227 ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
7228
7229 // Recursively split concat_vectors with more than 2 operands:
7230 //
7231 // concat_vector op1, op2, op3, op4
7232 // ->
7233 // concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
7234 //
7235 // This reduces the length of the chain of vslideups and allows us to
7236 // perform the vslideups at a smaller LMUL, limited to MF2.
7237 if (Op.getNumOperands() > 2 &&
7238 ContainerVT.bitsGE(getLMUL1VT(ContainerVT))) {
7239 MVT HalfVT = VT.getHalfNumVectorElementsVT();
7241 size_t HalfNumOps = Op.getNumOperands() / 2;
7242 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
7243 Op->ops().take_front(HalfNumOps));
7244 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
7245 Op->ops().drop_front(HalfNumOps));
7246 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
7247 }
7248
7249 unsigned NumOpElts =
7250 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
7251 SDValue Vec = DAG.getUNDEF(VT);
7252 for (const auto &OpIdx : enumerate(Op->ops())) {
7253 SDValue SubVec = OpIdx.value();
7254 // Don't insert undef subvectors.
7255 if (SubVec.isUndef())
7256 continue;
7257 Vec =
7258 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
7259 DAG.getVectorIdxConstant(OpIdx.index() * NumOpElts, DL));
7260 }
7261 return Vec;
7262 }
7263 case ISD::LOAD: {
7264 auto *Load = cast<LoadSDNode>(Op);
7265 EVT VecTy = Load->getMemoryVT();
7266 // Handle normal vector tuple load.
7267 if (VecTy.isRISCVVectorTuple()) {
7268 SDLoc DL(Op);
7269 MVT XLenVT = Subtarget.getXLenVT();
7270 unsigned NF = VecTy.getRISCVVectorTupleNumFields();
7271 unsigned Sz = VecTy.getSizeInBits().getKnownMinValue();
7272 unsigned NumElts = Sz / (NF * 8);
7273 int Log2LMUL = Log2_64(NumElts) - 3;
7274
7275 auto Flag = SDNodeFlags();
7276 Flag.setNoUnsignedWrap(true);
7277 SDValue Ret = DAG.getUNDEF(VecTy);
7278 SDValue BasePtr = Load->getBasePtr();
7279 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
7280 VROffset =
7281 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
7282 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
7283 SmallVector<SDValue, 8> OutChains;
7284
7285 // Load NF vector registers and combine them to a vector tuple.
7286 for (unsigned i = 0; i < NF; ++i) {
7287 SDValue LoadVal = DAG.getLoad(
7288 MVT::getScalableVectorVT(MVT::i8, NumElts), DL, Load->getChain(),
7289 BasePtr, MachinePointerInfo(Load->getAddressSpace()), Align(8));
7290 OutChains.push_back(LoadVal.getValue(1));
7291 Ret = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VecTy, Ret, LoadVal,
7292 DAG.getVectorIdxConstant(i, DL));
7293 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
7294 }
7295 return DAG.getMergeValues(
7296 {Ret, DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains)}, DL);
7297 }
7298
7299 if (auto V = expandUnalignedRVVLoad(Op, DAG))
7300 return V;
7301 if (Op.getValueType().isFixedLengthVector())
7302 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
7303 return Op;
7304 }
7305 case ISD::STORE: {
7306 auto *Store = cast<StoreSDNode>(Op);
7307 SDValue StoredVal = Store->getValue();
7308 EVT VecTy = StoredVal.getValueType();
7309 // Handle normal vector tuple store.
7310 if (VecTy.isRISCVVectorTuple()) {
7311 SDLoc DL(Op);
7312 MVT XLenVT = Subtarget.getXLenVT();
7313 unsigned NF = VecTy.getRISCVVectorTupleNumFields();
7314 unsigned Sz = VecTy.getSizeInBits().getKnownMinValue();
7315 unsigned NumElts = Sz / (NF * 8);
7316 int Log2LMUL = Log2_64(NumElts) - 3;
7317
7318 auto Flag = SDNodeFlags();
7319 Flag.setNoUnsignedWrap(true);
7320 SDValue Ret;
7321 SDValue Chain = Store->getChain();
7322 SDValue BasePtr = Store->getBasePtr();
7323 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
7324 VROffset =
7325 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
7326 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
7327
7328 // Extract subregisters in a vector tuple and store them individually.
7329 for (unsigned i = 0; i < NF; ++i) {
7330 auto Extract = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL,
7331 MVT::getScalableVectorVT(MVT::i8, NumElts),
7332 StoredVal, DAG.getVectorIdxConstant(i, DL));
7333 Ret = DAG.getStore(Chain, DL, Extract, BasePtr,
7334 MachinePointerInfo(Store->getAddressSpace()),
7335 Store->getOriginalAlign(),
7336 Store->getMemOperand()->getFlags());
7337 Chain = Ret.getValue(0);
7338 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
7339 }
7340 return Ret;
7341 }
7342
7343 if (auto V = expandUnalignedRVVStore(Op, DAG))
7344 return V;
7345 if (Op.getOperand(1).getValueType().isFixedLengthVector())
7346 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
7347 return Op;
7348 }
7349 case ISD::MLOAD:
7350 case ISD::VP_LOAD:
7351 return lowerMaskedLoad(Op, DAG);
7352 case ISD::MSTORE:
7353 case ISD::VP_STORE:
7354 return lowerMaskedStore(Op, DAG);
7356 return lowerVectorCompress(Op, DAG);
7357 case ISD::SELECT_CC: {
7358 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
7359 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
7360 // into separate SETCC+SELECT just like LegalizeDAG.
7361 SDValue Tmp1 = Op.getOperand(0);
7362 SDValue Tmp2 = Op.getOperand(1);
7363 SDValue True = Op.getOperand(2);
7364 SDValue False = Op.getOperand(3);
7365 EVT VT = Op.getValueType();
7366 SDValue CC = Op.getOperand(4);
7367 EVT CmpVT = Tmp1.getValueType();
7368 EVT CCVT =
7369 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
7370 SDLoc DL(Op);
7371 SDValue Cond =
7372 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
7373 return DAG.getSelect(DL, VT, Cond, True, False);
7374 }
7375 case ISD::SETCC: {
7376 MVT OpVT = Op.getOperand(0).getSimpleValueType();
7377 if (OpVT.isScalarInteger()) {
7378 MVT VT = Op.getSimpleValueType();
7379 SDValue LHS = Op.getOperand(0);
7380 SDValue RHS = Op.getOperand(1);
7381 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
7382 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
7383 "Unexpected CondCode");
7384
7385 SDLoc DL(Op);
7386
7387 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
7388 // convert this to the equivalent of (set(u)ge X, C+1) by using
7389 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
7390 // in a register.
7391 if (isa<ConstantSDNode>(RHS)) {
7392 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
7393 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
7394 // If this is an unsigned compare and the constant is -1, incrementing
7395 // the constant would change behavior. The result should be false.
7396 if (CCVal == ISD::SETUGT && Imm == -1)
7397 return DAG.getConstant(0, DL, VT);
7398 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
7399 CCVal = ISD::getSetCCSwappedOperands(CCVal);
7400 SDValue SetCC = DAG.getSetCC(
7401 DL, VT, LHS, DAG.getSignedConstant(Imm + 1, DL, OpVT), CCVal);
7402 return DAG.getLogicalNOT(DL, SetCC, VT);
7403 }
7404 }
7405
7406 // Not a constant we could handle, swap the operands and condition code to
7407 // SETLT/SETULT.
7408 CCVal = ISD::getSetCCSwappedOperands(CCVal);
7409 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
7410 }
7411
7412 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
7413 return SplitVectorOp(Op, DAG);
7414
7415 return lowerFixedLengthVectorSetccToRVV(Op, DAG);
7416 }
7417 case ISD::ADD:
7418 case ISD::SUB:
7419 case ISD::MUL:
7420 case ISD::MULHS:
7421 case ISD::MULHU:
7422 case ISD::AND:
7423 case ISD::OR:
7424 case ISD::XOR:
7425 case ISD::SDIV:
7426 case ISD::SREM:
7427 case ISD::UDIV:
7428 case ISD::UREM:
7429 case ISD::BSWAP:
7430 case ISD::CTPOP:
7431 return lowerToScalableOp(Op, DAG);
7432 case ISD::SHL:
7433 case ISD::SRA:
7434 case ISD::SRL:
7435 if (Op.getSimpleValueType().isFixedLengthVector())
7436 return lowerToScalableOp(Op, DAG);
7437 // This can be called for an i32 shift amount that needs to be promoted.
7438 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
7439 "Unexpected custom legalisation");
7440 return SDValue();
7441 case ISD::FABS:
7442 case ISD::FNEG:
7443 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
7444 return lowerFABSorFNEG(Op, DAG, Subtarget);
7445 [[fallthrough]];
7446 case ISD::FADD:
7447 case ISD::FSUB:
7448 case ISD::FMUL:
7449 case ISD::FDIV:
7450 case ISD::FSQRT:
7451 case ISD::FMA:
7452 case ISD::FMINNUM:
7453 case ISD::FMAXNUM:
7454 if (isPromotedOpNeedingSplit(Op, Subtarget))
7455 return SplitVectorOp(Op, DAG);
7456 [[fallthrough]];
7457 case ISD::AVGFLOORS:
7458 case ISD::AVGFLOORU:
7459 case ISD::AVGCEILS:
7460 case ISD::AVGCEILU:
7461 case ISD::SMIN:
7462 case ISD::SMAX:
7463 case ISD::UMIN:
7464 case ISD::UMAX:
7465 case ISD::UADDSAT:
7466 case ISD::USUBSAT:
7467 case ISD::SADDSAT:
7468 case ISD::SSUBSAT:
7469 return lowerToScalableOp(Op, DAG);
7470 case ISD::ABDS:
7471 case ISD::ABDU: {
7472 SDLoc dl(Op);
7473 EVT VT = Op->getValueType(0);
7474 SDValue LHS = DAG.getFreeze(Op->getOperand(0));
7475 SDValue RHS = DAG.getFreeze(Op->getOperand(1));
7476 bool IsSigned = Op->getOpcode() == ISD::ABDS;
7477
7478 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
7479 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
7480 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
7481 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
7482 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
7483 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
7484 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
7485 }
7486 case ISD::ABS:
7487 case ISD::VP_ABS:
7488 return lowerABS(Op, DAG);
7489 case ISD::CTLZ:
7491 case ISD::CTTZ:
7493 if (Subtarget.hasStdExtZvbb())
7494 return lowerToScalableOp(Op, DAG);
7495 assert(Op.getOpcode() != ISD::CTTZ);
7496 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7497 case ISD::VSELECT:
7498 return lowerFixedLengthVectorSelectToRVV(Op, DAG);
7499 case ISD::FCOPYSIGN:
7500 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
7501 return lowerFCOPYSIGN(Op, DAG, Subtarget);
7502 if (isPromotedOpNeedingSplit(Op, Subtarget))
7503 return SplitVectorOp(Op, DAG);
7504 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
7505 case ISD::STRICT_FADD:
7506 case ISD::STRICT_FSUB:
7507 case ISD::STRICT_FMUL:
7508 case ISD::STRICT_FDIV:
7509 case ISD::STRICT_FSQRT:
7510 case ISD::STRICT_FMA:
7511 if (isPromotedOpNeedingSplit(Op, Subtarget))
7512 return SplitStrictFPVectorOp(Op, DAG);
7513 return lowerToScalableOp(Op, DAG);
7514 case ISD::STRICT_FSETCC:
7516 return lowerVectorStrictFSetcc(Op, DAG);
7517 case ISD::STRICT_FCEIL:
7518 case ISD::STRICT_FRINT:
7519 case ISD::STRICT_FFLOOR:
7520 case ISD::STRICT_FTRUNC:
7522 case ISD::STRICT_FROUND:
7524 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7525 case ISD::MGATHER:
7526 case ISD::VP_GATHER:
7527 return lowerMaskedGather(Op, DAG);
7528 case ISD::MSCATTER:
7529 case ISD::VP_SCATTER:
7530 return lowerMaskedScatter(Op, DAG);
7531 case ISD::GET_ROUNDING:
7532 return lowerGET_ROUNDING(Op, DAG);
7533 case ISD::SET_ROUNDING:
7534 return lowerSET_ROUNDING(Op, DAG);
7535 case ISD::EH_DWARF_CFA:
7536 return lowerEH_DWARF_CFA(Op, DAG);
7537 case ISD::VP_MERGE:
7538 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7539 return lowerVPMergeMask(Op, DAG);
7540 [[fallthrough]];
7541 case ISD::VP_SELECT:
7542 case ISD::VP_ADD:
7543 case ISD::VP_SUB:
7544 case ISD::VP_MUL:
7545 case ISD::VP_SDIV:
7546 case ISD::VP_UDIV:
7547 case ISD::VP_SREM:
7548 case ISD::VP_UREM:
7549 case ISD::VP_UADDSAT:
7550 case ISD::VP_USUBSAT:
7551 case ISD::VP_SADDSAT:
7552 case ISD::VP_SSUBSAT:
7553 case ISD::VP_LRINT:
7554 case ISD::VP_LLRINT:
7555 return lowerVPOp(Op, DAG);
7556 case ISD::VP_AND:
7557 case ISD::VP_OR:
7558 case ISD::VP_XOR:
7559 return lowerLogicVPOp(Op, DAG);
7560 case ISD::VP_FADD:
7561 case ISD::VP_FSUB:
7562 case ISD::VP_FMUL:
7563 case ISD::VP_FDIV:
7564 case ISD::VP_FNEG:
7565 case ISD::VP_FABS:
7566 case ISD::VP_SQRT:
7567 case ISD::VP_FMA:
7568 case ISD::VP_FMINNUM:
7569 case ISD::VP_FMAXNUM:
7570 case ISD::VP_FCOPYSIGN:
7571 if (isPromotedOpNeedingSplit(Op, Subtarget))
7572 return SplitVPOp(Op, DAG);
7573 [[fallthrough]];
7574 case ISD::VP_SRA:
7575 case ISD::VP_SRL:
7576 case ISD::VP_SHL:
7577 return lowerVPOp(Op, DAG);
7578 case ISD::VP_IS_FPCLASS:
7579 return LowerIS_FPCLASS(Op, DAG);
7580 case ISD::VP_SIGN_EXTEND:
7581 case ISD::VP_ZERO_EXTEND:
7582 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7583 return lowerVPExtMaskOp(Op, DAG);
7584 return lowerVPOp(Op, DAG);
7585 case ISD::VP_TRUNCATE:
7586 return lowerVectorTruncLike(Op, DAG);
7587 case ISD::VP_FP_EXTEND:
7588 case ISD::VP_FP_ROUND:
7589 return lowerVectorFPExtendOrRoundLike(Op, DAG);
7590 case ISD::VP_SINT_TO_FP:
7591 case ISD::VP_UINT_TO_FP:
7592 if (Op.getValueType().isVector() &&
7593 ((Op.getValueType().getScalarType() == MVT::f16 &&
7594 (Subtarget.hasVInstructionsF16Minimal() &&
7595 !Subtarget.hasVInstructionsF16())) ||
7596 Op.getValueType().getScalarType() == MVT::bf16)) {
7597 if (isPromotedOpNeedingSplit(Op, Subtarget))
7598 return SplitVectorOp(Op, DAG);
7599 // int -> f32
7600 SDLoc DL(Op);
7601 MVT NVT =
7602 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
7603 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
7604 // f32 -> [b]f16
7605 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
7606 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7607 }
7608 [[fallthrough]];
7609 case ISD::VP_FP_TO_SINT:
7610 case ISD::VP_FP_TO_UINT:
7611 if (SDValue Op1 = Op.getOperand(0);
7612 Op1.getValueType().isVector() &&
7613 ((Op1.getValueType().getScalarType() == MVT::f16 &&
7614 (Subtarget.hasVInstructionsF16Minimal() &&
7615 !Subtarget.hasVInstructionsF16())) ||
7616 Op1.getValueType().getScalarType() == MVT::bf16)) {
7617 if (isPromotedOpNeedingSplit(Op1, Subtarget))
7618 return SplitVectorOp(Op, DAG);
7619 // [b]f16 -> f32
7620 SDLoc DL(Op);
7621 MVT NVT = MVT::getVectorVT(MVT::f32,
7622 Op1.getValueType().getVectorElementCount());
7623 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
7624 // f32 -> int
7625 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7626 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
7627 }
7628 return lowerVPFPIntConvOp(Op, DAG);
7629 case ISD::VP_SETCC:
7630 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
7631 return SplitVPOp(Op, DAG);
7632 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7633 return lowerVPSetCCMaskOp(Op, DAG);
7634 [[fallthrough]];
7635 case ISD::VP_SMIN:
7636 case ISD::VP_SMAX:
7637 case ISD::VP_UMIN:
7638 case ISD::VP_UMAX:
7639 case ISD::VP_BITREVERSE:
7640 case ISD::VP_BSWAP:
7641 return lowerVPOp(Op, DAG);
7642 case ISD::VP_CTLZ:
7643 case ISD::VP_CTLZ_ZERO_UNDEF:
7644 if (Subtarget.hasStdExtZvbb())
7645 return lowerVPOp(Op, DAG);
7646 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7647 case ISD::VP_CTTZ:
7648 case ISD::VP_CTTZ_ZERO_UNDEF:
7649 if (Subtarget.hasStdExtZvbb())
7650 return lowerVPOp(Op, DAG);
7651 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7652 case ISD::VP_CTPOP:
7653 return lowerVPOp(Op, DAG);
7654 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
7655 return lowerVPStridedLoad(Op, DAG);
7656 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
7657 return lowerVPStridedStore(Op, DAG);
7658 case ISD::VP_FCEIL:
7659 case ISD::VP_FFLOOR:
7660 case ISD::VP_FRINT:
7661 case ISD::VP_FNEARBYINT:
7662 case ISD::VP_FROUND:
7663 case ISD::VP_FROUNDEVEN:
7664 case ISD::VP_FROUNDTOZERO:
7665 if (isPromotedOpNeedingSplit(Op, Subtarget))
7666 return SplitVPOp(Op, DAG);
7667 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7668 case ISD::VP_FMAXIMUM:
7669 case ISD::VP_FMINIMUM:
7670 if (isPromotedOpNeedingSplit(Op, Subtarget))
7671 return SplitVPOp(Op, DAG);
7672 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
7673 case ISD::EXPERIMENTAL_VP_SPLICE:
7674 return lowerVPSpliceExperimental(Op, DAG);
7675 case ISD::EXPERIMENTAL_VP_REVERSE:
7676 return lowerVPReverseExperimental(Op, DAG);
7677 case ISD::EXPERIMENTAL_VP_SPLAT:
7678 return lowerVPSplatExperimental(Op, DAG);
7679 case ISD::CLEAR_CACHE: {
7680 assert(getTargetMachine().getTargetTriple().isOSLinux() &&
7681 "llvm.clear_cache only needs custom lower on Linux targets");
7682 SDLoc DL(Op);
7683 SDValue Flags = DAG.getConstant(0, DL, Subtarget.getXLenVT());
7684 return emitFlushICache(DAG, Op.getOperand(0), Op.getOperand(1),
7685 Op.getOperand(2), Flags, DL);
7686 }
7688 return lowerINIT_TRAMPOLINE(Op, DAG);
7690 return lowerADJUST_TRAMPOLINE(Op, DAG);
7691 }
7692}
7693
7694SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG, SDValue InChain,
7695 SDValue Start, SDValue End,
7696 SDValue Flags, SDLoc DL) const {
7697 MakeLibCallOptions CallOptions;
7698 std::pair<SDValue, SDValue> CallResult =
7699 makeLibCall(DAG, RTLIB::RISCV_FLUSH_ICACHE, MVT::isVoid,
7700 {Start, End, Flags}, CallOptions, DL, InChain);
7701
7702 // This function returns void so only the out chain matters.
7703 return CallResult.second;
7704}
7705
7706SDValue RISCVTargetLowering::lowerINIT_TRAMPOLINE(SDValue Op,
7707 SelectionDAG &DAG) const {
7708 if (!Subtarget.is64Bit())
7709 llvm::report_fatal_error("Trampolines only implemented for RV64");
7710
7711 // Create an MCCodeEmitter to encode instructions.
7713 assert(TLO);
7714 MCContext &MCCtx = TLO->getContext();
7715
7716 std::unique_ptr<MCCodeEmitter> CodeEmitter(
7717 createRISCVMCCodeEmitter(*getTargetMachine().getMCInstrInfo(), MCCtx));
7718
7719 SDValue Root = Op.getOperand(0);
7720 SDValue Trmp = Op.getOperand(1); // trampoline
7721 SDLoc dl(Op);
7722
7723 const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
7724
7725 // We store in the trampoline buffer the following instructions and data.
7726 // Offset:
7727 // 0: auipc t2, 0
7728 // 4: ld t0, 24(t2)
7729 // 8: ld t2, 16(t2)
7730 // 12: jalr t0
7731 // 16: <StaticChainOffset>
7732 // 24: <FunctionAddressOffset>
7733 // 32:
7734
7735 constexpr unsigned StaticChainOffset = 16;
7736 constexpr unsigned FunctionAddressOffset = 24;
7737
7739 assert(STI);
7740 auto GetEncoding = [&](const MCInst &MC) {
7743 CodeEmitter->encodeInstruction(MC, CB, Fixups, *STI);
7744 uint32_t Encoding = support::endian::read32le(CB.data());
7745 return Encoding;
7746 };
7747
7748 SDValue OutChains[6];
7749
7750 uint32_t Encodings[] = {
7751 // auipc t2, 0
7752 // Loads the current PC into t2.
7753 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X7).addImm(0)),
7754 // ld t0, 24(t2)
7755 // Loads the function address into t0. Note that we are using offsets
7756 // pc-relative to the first instruction of the trampoline.
7757 GetEncoding(
7758 MCInstBuilder(RISCV::LD).addReg(RISCV::X5).addReg(RISCV::X7).addImm(
7759 FunctionAddressOffset)),
7760 // ld t2, 16(t2)
7761 // Load the value of the static chain.
7762 GetEncoding(
7763 MCInstBuilder(RISCV::LD).addReg(RISCV::X7).addReg(RISCV::X7).addImm(
7764 StaticChainOffset)),
7765 // jalr t0
7766 // Jump to the function.
7767 GetEncoding(MCInstBuilder(RISCV::JALR)
7768 .addReg(RISCV::X0)
7769 .addReg(RISCV::X5)
7770 .addImm(0))};
7771
7772 // Store encoded instructions.
7773 for (auto [Idx, Encoding] : llvm::enumerate(Encodings)) {
7774 SDValue Addr = Idx > 0 ? DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
7775 DAG.getConstant(Idx * 4, dl, MVT::i64))
7776 : Trmp;
7777 OutChains[Idx] = DAG.getTruncStore(
7778 Root, dl, DAG.getConstant(Encoding, dl, MVT::i64), Addr,
7779 MachinePointerInfo(TrmpAddr, Idx * 4), MVT::i32);
7780 }
7781
7782 // Now store the variable part of the trampoline.
7783 SDValue FunctionAddress = Op.getOperand(2);
7784 SDValue StaticChain = Op.getOperand(3);
7785
7786 // Store the given static chain and function pointer in the trampoline buffer.
7787 struct OffsetValuePair {
7788 const unsigned Offset;
7789 const SDValue Value;
7790 SDValue Addr = SDValue(); // Used to cache the address.
7791 } OffsetValues[] = {
7792 {StaticChainOffset, StaticChain},
7793 {FunctionAddressOffset, FunctionAddress},
7794 };
7795 for (auto [Idx, OffsetValue] : llvm::enumerate(OffsetValues)) {
7796 SDValue Addr =
7797 DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
7798 DAG.getConstant(OffsetValue.Offset, dl, MVT::i64));
7799 OffsetValue.Addr = Addr;
7800 OutChains[Idx + 4] =
7801 DAG.getStore(Root, dl, OffsetValue.Value, Addr,
7802 MachinePointerInfo(TrmpAddr, OffsetValue.Offset));
7803 }
7804
7805 SDValue StoreToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
7806
7807 // The end of instructions of trampoline is the same as the static chain
7808 // address that we computed earlier.
7809 SDValue EndOfTrmp = OffsetValues[0].Addr;
7810
7811 // Call clear cache on the trampoline instructions.
7812 SDValue Chain = DAG.getNode(ISD::CLEAR_CACHE, dl, MVT::Other, StoreToken,
7813 Trmp, EndOfTrmp);
7814
7815 return Chain;
7816}
7817
7818SDValue RISCVTargetLowering::lowerADJUST_TRAMPOLINE(SDValue Op,
7819 SelectionDAG &DAG) const {
7820 if (!Subtarget.is64Bit())
7821 llvm::report_fatal_error("Trampolines only implemented for RV64");
7822
7823 return Op.getOperand(0);
7824}
7825
7827 SelectionDAG &DAG, unsigned Flags) {
7828 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
7829}
7830
7832 SelectionDAG &DAG, unsigned Flags) {
7833 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
7834 Flags);
7835}
7836
7838 SelectionDAG &DAG, unsigned Flags) {
7839 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
7840 N->getOffset(), Flags);
7841}
7842
7844 SelectionDAG &DAG, unsigned Flags) {
7845 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
7846}
7847
7849 EVT Ty, SelectionDAG &DAG) {
7851 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
7852 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
7853 return DAG.getLoad(
7854 Ty, DL, DAG.getEntryNode(), LC,
7856}
7857
7859 EVT Ty, SelectionDAG &DAG) {
7861 RISCVConstantPoolValue::Create(*DAG.getContext(), N->getSymbol());
7862 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
7863 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
7864 return DAG.getLoad(
7865 Ty, DL, DAG.getEntryNode(), LC,
7867}
7868
7869template <class NodeTy>
7870SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
7871 bool IsLocal, bool IsExternWeak) const {
7872 SDLoc DL(N);
7873 EVT Ty = getPointerTy(DAG.getDataLayout());
7874
7875 // When HWASAN is used and tagging of global variables is enabled
7876 // they should be accessed via the GOT, since the tagged address of a global
7877 // is incompatible with existing code models. This also applies to non-pic
7878 // mode.
7879 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
7880 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7881 if (IsLocal && !Subtarget.allowTaggedGlobals())
7882 // Use PC-relative addressing to access the symbol. This generates the
7883 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
7884 // %pcrel_lo(auipc)).
7885 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7886
7887 // Use PC-relative addressing to access the GOT for this symbol, then load
7888 // the address from the GOT. This generates the pattern (PseudoLGA sym),
7889 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7890 SDValue Load =
7891 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7897 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7898 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7899 return Load;
7900 }
7901
7902 switch (getTargetMachine().getCodeModel()) {
7903 default:
7904 report_fatal_error("Unsupported code model for lowering");
7905 case CodeModel::Small: {
7906 // Generate a sequence for accessing addresses within the first 2 GiB of
7907 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
7908 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
7909 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
7910 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
7911 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
7912 }
7913 case CodeModel::Medium: {
7914 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7915 if (IsExternWeak) {
7916 // An extern weak symbol may be undefined, i.e. have value 0, which may
7917 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
7918 // symbol. This generates the pattern (PseudoLGA sym), which expands to
7919 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7920 SDValue Load =
7921 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7927 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7928 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7929 return Load;
7930 }
7931
7932 // Generate a sequence for accessing addresses within any 2GiB range within
7933 // the address space. This generates the pattern (PseudoLLA sym), which
7934 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
7935 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7936 }
7937 case CodeModel::Large: {
7938 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N))
7939 return getLargeGlobalAddress(G, DL, Ty, DAG);
7940
7941 // Using pc-relative mode for other node type.
7942 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7943 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7944 }
7945 }
7946}
7947
7948SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
7949 SelectionDAG &DAG) const {
7950 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
7951 assert(N->getOffset() == 0 && "unexpected offset in global node");
7952 const GlobalValue *GV = N->getGlobal();
7953 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
7954}
7955
7956SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
7957 SelectionDAG &DAG) const {
7958 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
7959
7960 return getAddr(N, DAG);
7961}
7962
7963SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
7964 SelectionDAG &DAG) const {
7965 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
7966
7967 return getAddr(N, DAG);
7968}
7969
7970SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
7971 SelectionDAG &DAG) const {
7972 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
7973
7974 return getAddr(N, DAG);
7975}
7976
7977SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
7978 SelectionDAG &DAG,
7979 bool UseGOT) const {
7980 SDLoc DL(N);
7981 EVT Ty = getPointerTy(DAG.getDataLayout());
7982 const GlobalValue *GV = N->getGlobal();
7983 MVT XLenVT = Subtarget.getXLenVT();
7984
7985 if (UseGOT) {
7986 // Use PC-relative addressing to access the GOT for this TLS symbol, then
7987 // load the address from the GOT and add the thread pointer. This generates
7988 // the pattern (PseudoLA_TLS_IE sym), which expands to
7989 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
7990 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
7991 SDValue Load =
7992 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
7998 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7999 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
8000
8001 // Add the thread pointer.
8002 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
8003 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
8004 }
8005
8006 // Generate a sequence for accessing the address relative to the thread
8007 // pointer, with the appropriate adjustment for the thread pointer offset.
8008 // This generates the pattern
8009 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
8010 SDValue AddrHi =
8012 SDValue AddrAdd =
8014 SDValue AddrLo =
8016
8017 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
8018 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
8019 SDValue MNAdd =
8020 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
8021 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
8022}
8023
8024SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
8025 SelectionDAG &DAG) const {
8026 SDLoc DL(N);
8027 EVT Ty = getPointerTy(DAG.getDataLayout());
8028 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
8029 const GlobalValue *GV = N->getGlobal();
8030
8031 // Use a PC-relative addressing mode to access the global dynamic GOT address.
8032 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
8033 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
8034 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
8035 SDValue Load =
8036 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
8037
8038 // Prepare argument list to generate call.
8040 ArgListEntry Entry;
8041 Entry.Node = Load;
8042 Entry.Ty = CallTy;
8043 Args.push_back(Entry);
8044
8045 // Setup call to __tls_get_addr.
8047 CLI.setDebugLoc(DL)
8048 .setChain(DAG.getEntryNode())
8049 .setLibCallee(CallingConv::C, CallTy,
8050 DAG.getExternalSymbol("__tls_get_addr", Ty),
8051 std::move(Args));
8052
8053 return LowerCallTo(CLI).first;
8054}
8055
8056SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
8057 SelectionDAG &DAG) const {
8058 SDLoc DL(N);
8059 EVT Ty = getPointerTy(DAG.getDataLayout());
8060 const GlobalValue *GV = N->getGlobal();
8061
8062 // Use a PC-relative addressing mode to access the global dynamic GOT address.
8063 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
8064 //
8065 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
8066 // lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
8067 // addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)
8068 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
8069 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
8070 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);
8071}
8072
8073SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
8074 SelectionDAG &DAG) const {
8075 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
8076 assert(N->getOffset() == 0 && "unexpected offset in global node");
8077
8078 if (DAG.getTarget().useEmulatedTLS())
8079 return LowerToTLSEmulatedModel(N, DAG);
8080
8082
8085 report_fatal_error("In GHC calling convention TLS is not supported");
8086
8087 SDValue Addr;
8088 switch (Model) {
8090 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
8091 break;
8093 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
8094 break;
8097 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
8098 : getDynamicTLSAddr(N, DAG);
8099 break;
8100 }
8101
8102 return Addr;
8103}
8104
8105// Return true if Val is equal to (setcc LHS, RHS, CC).
8106// Return false if Val is the inverse of (setcc LHS, RHS, CC).
8107// Otherwise, return std::nullopt.
8108static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
8109 ISD::CondCode CC, SDValue Val) {
8110 assert(Val->getOpcode() == ISD::SETCC);
8111 SDValue LHS2 = Val.getOperand(0);
8112 SDValue RHS2 = Val.getOperand(1);
8113 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
8114
8115 if (LHS == LHS2 && RHS == RHS2) {
8116 if (CC == CC2)
8117 return true;
8118 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
8119 return false;
8120 } else if (LHS == RHS2 && RHS == LHS2) {
8122 if (CC == CC2)
8123 return true;
8124 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
8125 return false;
8126 }
8127
8128 return std::nullopt;
8129}
8130
8132 const RISCVSubtarget &Subtarget) {
8133 SDValue CondV = N->getOperand(0);
8134 SDValue TrueV = N->getOperand(1);
8135 SDValue FalseV = N->getOperand(2);
8136 MVT VT = N->getSimpleValueType(0);
8137 SDLoc DL(N);
8138
8139 if (!Subtarget.hasConditionalMoveFusion()) {
8140 // (select c, -1, y) -> -c | y
8141 if (isAllOnesConstant(TrueV)) {
8142 SDValue Neg = DAG.getNegative(CondV, DL, VT);
8143 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
8144 }
8145 // (select c, y, -1) -> (c-1) | y
8146 if (isAllOnesConstant(FalseV)) {
8147 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
8148 DAG.getAllOnesConstant(DL, VT));
8149 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
8150 }
8151
8152 // (select c, 0, y) -> (c-1) & y
8153 if (isNullConstant(TrueV)) {
8154 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
8155 DAG.getAllOnesConstant(DL, VT));
8156 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
8157 }
8158 // (select c, y, 0) -> -c & y
8159 if (isNullConstant(FalseV)) {
8160 SDValue Neg = DAG.getNegative(CondV, DL, VT);
8161 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
8162 }
8163 }
8164
8165 // select c, ~x, x --> xor -c, x
8166 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
8167 const APInt &TrueVal = TrueV->getAsAPIntVal();
8168 const APInt &FalseVal = FalseV->getAsAPIntVal();
8169 if (~TrueVal == FalseVal) {
8170 SDValue Neg = DAG.getNegative(CondV, DL, VT);
8171 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
8172 }
8173 }
8174
8175 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
8176 // when both truev and falsev are also setcc.
8177 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
8178 FalseV.getOpcode() == ISD::SETCC) {
8179 SDValue LHS = CondV.getOperand(0);
8180 SDValue RHS = CondV.getOperand(1);
8181 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
8182
8183 // (select x, x, y) -> x | y
8184 // (select !x, x, y) -> x & y
8185 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
8186 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
8187 DAG.getFreeze(FalseV));
8188 }
8189 // (select x, y, x) -> x & y
8190 // (select !x, y, x) -> x | y
8191 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
8192 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
8193 DAG.getFreeze(TrueV), FalseV);
8194 }
8195 }
8196
8197 return SDValue();
8198}
8199
8200// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
8201// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
8202// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
8203// being `0` or `-1`. In such cases we can replace `select` with `and`.
8204// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
8205// than `c0`?
8206static SDValue
8208 const RISCVSubtarget &Subtarget) {
8209 if (Subtarget.hasShortForwardBranchOpt())
8210 return SDValue();
8211
8212 unsigned SelOpNo = 0;
8213 SDValue Sel = BO->getOperand(0);
8214 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
8215 SelOpNo = 1;
8216 Sel = BO->getOperand(1);
8217 }
8218
8219 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
8220 return SDValue();
8221
8222 unsigned ConstSelOpNo = 1;
8223 unsigned OtherSelOpNo = 2;
8224 if (!dyn_cast<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
8225 ConstSelOpNo = 2;
8226 OtherSelOpNo = 1;
8227 }
8228 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
8229 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
8230 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
8231 return SDValue();
8232
8233 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
8234 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
8235 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
8236 return SDValue();
8237
8238 SDLoc DL(Sel);
8239 EVT VT = BO->getValueType(0);
8240
8241 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
8242 if (SelOpNo == 1)
8243 std::swap(NewConstOps[0], NewConstOps[1]);
8244
8245 SDValue NewConstOp =
8246 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
8247 if (!NewConstOp)
8248 return SDValue();
8249
8250 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
8251 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
8252 return SDValue();
8253
8254 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
8255 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
8256 if (SelOpNo == 1)
8257 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
8258 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
8259
8260 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
8261 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
8262 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
8263}
8264
8265SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
8266 SDValue CondV = Op.getOperand(0);
8267 SDValue TrueV = Op.getOperand(1);
8268 SDValue FalseV = Op.getOperand(2);
8269 SDLoc DL(Op);
8270 MVT VT = Op.getSimpleValueType();
8271 MVT XLenVT = Subtarget.getXLenVT();
8272
8273 // Lower vector SELECTs to VSELECTs by splatting the condition.
8274 if (VT.isVector()) {
8275 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
8276 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
8277 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
8278 }
8279
8280 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
8281 // nodes to implement the SELECT. Performing the lowering here allows for
8282 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
8283 // sequence or RISCVISD::SELECT_CC node (branch-based select).
8284 if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
8285 VT.isScalarInteger()) {
8286 // (select c, t, 0) -> (czero_eqz t, c)
8287 if (isNullConstant(FalseV))
8288 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
8289 // (select c, 0, f) -> (czero_nez f, c)
8290 if (isNullConstant(TrueV))
8291 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
8292
8293 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
8294 if (TrueV.getOpcode() == ISD::AND &&
8295 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV))
8296 return DAG.getNode(
8297 ISD::OR, DL, VT, TrueV,
8298 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
8299 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
8300 if (FalseV.getOpcode() == ISD::AND &&
8301 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV))
8302 return DAG.getNode(
8303 ISD::OR, DL, VT, FalseV,
8304 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
8305
8306 // Try some other optimizations before falling back to generic lowering.
8307 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
8308 return V;
8309
8310 // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
8311 // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
8312 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
8313 const APInt &TrueVal = TrueV->getAsAPIntVal();
8314 const APInt &FalseVal = FalseV->getAsAPIntVal();
8315 const int TrueValCost = RISCVMatInt::getIntMatCost(
8316 TrueVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
8317 const int FalseValCost = RISCVMatInt::getIntMatCost(
8318 FalseVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
8319 bool IsCZERO_NEZ = TrueValCost <= FalseValCost;
8320 SDValue LHSVal = DAG.getConstant(
8321 IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
8322 SDValue RHSVal =
8323 DAG.getConstant(IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT);
8324 SDValue CMOV =
8326 DL, VT, LHSVal, CondV);
8327 return DAG.getNode(ISD::ADD, DL, VT, CMOV, RHSVal);
8328 }
8329
8330 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
8331 // Unless we have the short forward branch optimization.
8332 if (!Subtarget.hasConditionalMoveFusion())
8333 return DAG.getNode(
8334 ISD::OR, DL, VT,
8335 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
8336 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
8337 }
8338
8339 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
8340 return V;
8341
8342 if (Op.hasOneUse()) {
8343 unsigned UseOpc = Op->user_begin()->getOpcode();
8344 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
8345 SDNode *BinOp = *Op->user_begin();
8346 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
8347 DAG, Subtarget)) {
8348 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
8349 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
8350 // may return a constant node and cause crash in lowerSELECT.
8351 if (NewSel.getOpcode() == ISD::SELECT)
8352 return lowerSELECT(NewSel, DAG);
8353 return NewSel;
8354 }
8355 }
8356 }
8357
8358 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
8359 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
8360 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
8361 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
8362 if (FPTV && FPFV) {
8363 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
8364 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
8365 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
8366 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
8367 DAG.getConstant(1, DL, XLenVT));
8368 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
8369 }
8370 }
8371
8372 // If the condition is not an integer SETCC which operates on XLenVT, we need
8373 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
8374 // (select condv, truev, falsev)
8375 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
8376 if (CondV.getOpcode() != ISD::SETCC ||
8377 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
8378 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
8379 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
8380
8381 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
8382
8383 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
8384 }
8385
8386 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
8387 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
8388 // advantage of the integer compare+branch instructions. i.e.:
8389 // (select (setcc lhs, rhs, cc), truev, falsev)
8390 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
8391 SDValue LHS = CondV.getOperand(0);
8392 SDValue RHS = CondV.getOperand(1);
8393 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
8394
8395 // Special case for a select of 2 constants that have a diffence of 1.
8396 // Normally this is done by DAGCombine, but if the select is introduced by
8397 // type legalization or op legalization, we miss it. Restricting to SETLT
8398 // case for now because that is what signed saturating add/sub need.
8399 // FIXME: We don't need the condition to be SETLT or even a SETCC,
8400 // but we would probably want to swap the true/false values if the condition
8401 // is SETGE/SETLE to avoid an XORI.
8402 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
8403 CCVal == ISD::SETLT) {
8404 const APInt &TrueVal = TrueV->getAsAPIntVal();
8405 const APInt &FalseVal = FalseV->getAsAPIntVal();
8406 if (TrueVal - 1 == FalseVal)
8407 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
8408 if (TrueVal + 1 == FalseVal)
8409 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
8410 }
8411
8412 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
8413 // 1 < x ? x : 1 -> 0 < x ? x : 1
8414 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
8415 RHS == TrueV && LHS == FalseV) {
8416 LHS = DAG.getConstant(0, DL, VT);
8417 // 0 <u x is the same as x != 0.
8418 if (CCVal == ISD::SETULT) {
8419 std::swap(LHS, RHS);
8420 CCVal = ISD::SETNE;
8421 }
8422 }
8423
8424 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
8425 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
8426 RHS == FalseV) {
8427 RHS = DAG.getConstant(0, DL, VT);
8428 }
8429
8430 SDValue TargetCC = DAG.getCondCode(CCVal);
8431
8432 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
8433 // (select (setcc lhs, rhs, CC), constant, falsev)
8434 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
8435 std::swap(TrueV, FalseV);
8436 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
8437 }
8438
8439 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
8440 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
8441}
8442
8443SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
8444 SDValue CondV = Op.getOperand(1);
8445 SDLoc DL(Op);
8446 MVT XLenVT = Subtarget.getXLenVT();
8447
8448 if (CondV.getOpcode() == ISD::SETCC &&
8449 CondV.getOperand(0).getValueType() == XLenVT) {
8450 SDValue LHS = CondV.getOperand(0);
8451 SDValue RHS = CondV.getOperand(1);
8452 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
8453
8454 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
8455
8456 SDValue TargetCC = DAG.getCondCode(CCVal);
8457 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
8458 LHS, RHS, TargetCC, Op.getOperand(2));
8459 }
8460
8461 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
8462 CondV, DAG.getConstant(0, DL, XLenVT),
8463 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
8464}
8465
8466SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
8469
8470 SDLoc DL(Op);
8471 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
8473
8474 // vastart just stores the address of the VarArgsFrameIndex slot into the
8475 // memory location argument.
8476 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
8477 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
8478 MachinePointerInfo(SV));
8479}
8480
8481SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
8482 SelectionDAG &DAG) const {
8483 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
8485 MachineFrameInfo &MFI = MF.getFrameInfo();
8486 MFI.setFrameAddressIsTaken(true);
8487 Register FrameReg = RI.getFrameRegister(MF);
8488 int XLenInBytes = Subtarget.getXLen() / 8;
8489
8490 EVT VT = Op.getValueType();
8491 SDLoc DL(Op);
8492 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
8493 unsigned Depth = Op.getConstantOperandVal(0);
8494 while (Depth--) {
8495 int Offset = -(XLenInBytes * 2);
8496 SDValue Ptr = DAG.getNode(
8497 ISD::ADD, DL, VT, FrameAddr,
8499 FrameAddr =
8500 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
8501 }
8502 return FrameAddr;
8503}
8504
8505SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
8506 SelectionDAG &DAG) const {
8507 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
8509 MachineFrameInfo &MFI = MF.getFrameInfo();
8510 MFI.setReturnAddressIsTaken(true);
8511 MVT XLenVT = Subtarget.getXLenVT();
8512 int XLenInBytes = Subtarget.getXLen() / 8;
8513
8515 return SDValue();
8516
8517 EVT VT = Op.getValueType();
8518 SDLoc DL(Op);
8519 unsigned Depth = Op.getConstantOperandVal(0);
8520 if (Depth) {
8521 int Off = -XLenInBytes;
8522 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
8523 SDValue Offset = DAG.getSignedConstant(Off, DL, VT);
8524 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
8525 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
8527 }
8528
8529 // Return the value of the return address register, marking it an implicit
8530 // live-in.
8531 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
8532 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
8533}
8534
8535SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
8536 SelectionDAG &DAG) const {
8537 SDLoc DL(Op);
8538 SDValue Lo = Op.getOperand(0);
8539 SDValue Hi = Op.getOperand(1);
8540 SDValue Shamt = Op.getOperand(2);
8541 EVT VT = Lo.getValueType();
8542
8543 // if Shamt-XLEN < 0: // Shamt < XLEN
8544 // Lo = Lo << Shamt
8545 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
8546 // else:
8547 // Lo = 0
8548 // Hi = Lo << (Shamt-XLEN)
8549
8550 SDValue Zero = DAG.getConstant(0, DL, VT);
8551 SDValue One = DAG.getConstant(1, DL, VT);
8552 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
8553 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
8554 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
8555 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
8556
8557 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
8558 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
8559 SDValue ShiftRightLo =
8560 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
8561 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
8562 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
8563 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
8564
8565 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
8566
8567 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
8568 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
8569
8570 SDValue Parts[2] = {Lo, Hi};
8571 return DAG.getMergeValues(Parts, DL);
8572}
8573
8574SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
8575 bool IsSRA) const {
8576 SDLoc DL(Op);
8577 SDValue Lo = Op.getOperand(0);
8578 SDValue Hi = Op.getOperand(1);
8579 SDValue Shamt = Op.getOperand(2);
8580 EVT VT = Lo.getValueType();
8581
8582 // SRA expansion:
8583 // if Shamt-XLEN < 0: // Shamt < XLEN
8584 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
8585 // Hi = Hi >>s Shamt
8586 // else:
8587 // Lo = Hi >>s (Shamt-XLEN);
8588 // Hi = Hi >>s (XLEN-1)
8589 //
8590 // SRL expansion:
8591 // if Shamt-XLEN < 0: // Shamt < XLEN
8592 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
8593 // Hi = Hi >>u Shamt
8594 // else:
8595 // Lo = Hi >>u (Shamt-XLEN);
8596 // Hi = 0;
8597
8598 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
8599
8600 SDValue Zero = DAG.getConstant(0, DL, VT);
8601 SDValue One = DAG.getConstant(1, DL, VT);
8602 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
8603 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
8604 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
8605 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
8606
8607 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
8608 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
8609 SDValue ShiftLeftHi =
8610 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
8611 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
8612 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
8613 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
8614 SDValue HiFalse =
8615 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
8616
8617 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
8618
8619 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
8620 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
8621
8622 SDValue Parts[2] = {Lo, Hi};
8623 return DAG.getMergeValues(Parts, DL);
8624}
8625
8626// Lower splats of i1 types to SETCC. For each mask vector type, we have a
8627// legal equivalently-sized i8 type, so we can use that as a go-between.
8628SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
8629 SelectionDAG &DAG) const {
8630 SDLoc DL(Op);
8631 MVT VT = Op.getSimpleValueType();
8632 SDValue SplatVal = Op.getOperand(0);
8633 // All-zeros or all-ones splats are handled specially.
8634 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
8635 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
8636 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
8637 }
8638 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
8639 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
8640 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
8641 }
8642 MVT InterVT = VT.changeVectorElementType(MVT::i8);
8643 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
8644 DAG.getConstant(1, DL, SplatVal.getValueType()));
8645 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
8646 SDValue Zero = DAG.getConstant(0, DL, InterVT);
8647 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
8648}
8649
8650// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
8651// illegal (currently only vXi64 RV32).
8652// FIXME: We could also catch non-constant sign-extended i32 values and lower
8653// them to VMV_V_X_VL.
8654SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
8655 SelectionDAG &DAG) const {
8656 SDLoc DL(Op);
8657 MVT VecVT = Op.getSimpleValueType();
8658 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
8659 "Unexpected SPLAT_VECTOR_PARTS lowering");
8660
8661 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
8662 SDValue Lo = Op.getOperand(0);
8663 SDValue Hi = Op.getOperand(1);
8664
8665 MVT ContainerVT = VecVT;
8666 if (VecVT.isFixedLengthVector())
8667 ContainerVT = getContainerForFixedLengthVector(VecVT);
8668
8669 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
8670
8671 SDValue Res =
8672 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
8673
8674 if (VecVT.isFixedLengthVector())
8675 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
8676
8677 return Res;
8678}
8679
8680// Custom-lower extensions from mask vectors by using a vselect either with 1
8681// for zero/any-extension or -1 for sign-extension:
8682// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
8683// Note that any-extension is lowered identically to zero-extension.
8684SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
8685 int64_t ExtTrueVal) const {
8686 SDLoc DL(Op);
8687 MVT VecVT = Op.getSimpleValueType();
8688 SDValue Src = Op.getOperand(0);
8689 // Only custom-lower extensions from mask types
8690 assert(Src.getValueType().isVector() &&
8691 Src.getValueType().getVectorElementType() == MVT::i1);
8692
8693 if (VecVT.isScalableVector()) {
8694 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
8695 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, VecVT);
8696 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
8697 }
8698
8699 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
8700 MVT I1ContainerVT =
8701 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
8702
8703 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
8704
8705 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
8706
8707 MVT XLenVT = Subtarget.getXLenVT();
8708 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
8709 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, XLenVT);
8710
8711 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8712 DAG.getUNDEF(ContainerVT), SplatZero, VL);
8713 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8714 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
8715 SDValue Select =
8716 DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal,
8717 SplatZero, DAG.getUNDEF(ContainerVT), VL);
8718
8719 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
8720}
8721
8722SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
8723 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
8724 MVT ExtVT = Op.getSimpleValueType();
8725 // Only custom-lower extensions from fixed-length vector types.
8726 if (!ExtVT.isFixedLengthVector())
8727 return Op;
8728 MVT VT = Op.getOperand(0).getSimpleValueType();
8729 // Grab the canonical container type for the extended type. Infer the smaller
8730 // type from that to ensure the same number of vector elements, as we know
8731 // the LMUL will be sufficient to hold the smaller type.
8732 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
8733 // Get the extended container type manually to ensure the same number of
8734 // vector elements between source and dest.
8735 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
8736 ContainerExtVT.getVectorElementCount());
8737
8738 SDValue Op1 =
8739 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
8740
8741 SDLoc DL(Op);
8742 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
8743
8744 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
8745
8746 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
8747}
8748
8749// Custom-lower truncations from vectors to mask vectors by using a mask and a
8750// setcc operation:
8751// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
8752SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
8753 SelectionDAG &DAG) const {
8754 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
8755 SDLoc DL(Op);
8756 EVT MaskVT = Op.getValueType();
8757 // Only expect to custom-lower truncations to mask types
8758 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
8759 "Unexpected type for vector mask lowering");
8760 SDValue Src = Op.getOperand(0);
8761 MVT VecVT = Src.getSimpleValueType();
8762 SDValue Mask, VL;
8763 if (IsVPTrunc) {
8764 Mask = Op.getOperand(1);
8765 VL = Op.getOperand(2);
8766 }
8767 // If this is a fixed vector, we need to convert it to a scalable vector.
8768 MVT ContainerVT = VecVT;
8769
8770 if (VecVT.isFixedLengthVector()) {
8771 ContainerVT = getContainerForFixedLengthVector(VecVT);
8772 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8773 if (IsVPTrunc) {
8774 MVT MaskContainerVT =
8775 getContainerForFixedLengthVector(Mask.getSimpleValueType());
8776 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
8777 }
8778 }
8779
8780 if (!IsVPTrunc) {
8781 std::tie(Mask, VL) =
8782 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8783 }
8784
8785 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
8786 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
8787
8788 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8789 DAG.getUNDEF(ContainerVT), SplatOne, VL);
8790 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8791 DAG.getUNDEF(ContainerVT), SplatZero, VL);
8792
8793 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
8794 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
8795 DAG.getUNDEF(ContainerVT), Mask, VL);
8796 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
8797 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
8798 DAG.getUNDEF(MaskContainerVT), Mask, VL});
8799 if (MaskVT.isFixedLengthVector())
8800 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
8801 return Trunc;
8802}
8803
8804SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
8805 SelectionDAG &DAG) const {
8806 unsigned Opc = Op.getOpcode();
8807 bool IsVPTrunc = Opc == ISD::VP_TRUNCATE;
8808 SDLoc DL(Op);
8809
8810 MVT VT = Op.getSimpleValueType();
8811 // Only custom-lower vector truncates
8812 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8813
8814 // Truncates to mask types are handled differently
8815 if (VT.getVectorElementType() == MVT::i1)
8816 return lowerVectorMaskTruncLike(Op, DAG);
8817
8818 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
8819 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
8820 // truncate by one power of two at a time.
8821 MVT DstEltVT = VT.getVectorElementType();
8822
8823 SDValue Src = Op.getOperand(0);
8824 MVT SrcVT = Src.getSimpleValueType();
8825 MVT SrcEltVT = SrcVT.getVectorElementType();
8826
8827 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
8828 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
8829 "Unexpected vector truncate lowering");
8830
8831 MVT ContainerVT = SrcVT;
8832 SDValue Mask, VL;
8833 if (IsVPTrunc) {
8834 Mask = Op.getOperand(1);
8835 VL = Op.getOperand(2);
8836 }
8837 if (SrcVT.isFixedLengthVector()) {
8838 ContainerVT = getContainerForFixedLengthVector(SrcVT);
8839 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8840 if (IsVPTrunc) {
8841 MVT MaskVT = getMaskTypeFor(ContainerVT);
8842 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8843 }
8844 }
8845
8846 SDValue Result = Src;
8847 if (!IsVPTrunc) {
8848 std::tie(Mask, VL) =
8849 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8850 }
8851
8852 unsigned NewOpc;
8853 if (Opc == ISD::TRUNCATE_SSAT_S)
8855 else if (Opc == ISD::TRUNCATE_USAT_U)
8857 else
8859
8860 do {
8861 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
8862 MVT ResultVT = ContainerVT.changeVectorElementType(SrcEltVT);
8863 Result = DAG.getNode(NewOpc, DL, ResultVT, Result, Mask, VL);
8864 } while (SrcEltVT != DstEltVT);
8865
8866 if (SrcVT.isFixedLengthVector())
8867 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
8868
8869 return Result;
8870}
8871
8872SDValue
8873RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
8874 SelectionDAG &DAG) const {
8875 SDLoc DL(Op);
8876 SDValue Chain = Op.getOperand(0);
8877 SDValue Src = Op.getOperand(1);
8878 MVT VT = Op.getSimpleValueType();
8879 MVT SrcVT = Src.getSimpleValueType();
8880 MVT ContainerVT = VT;
8881 if (VT.isFixedLengthVector()) {
8882 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8883 ContainerVT =
8884 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8885 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8886 }
8887
8888 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8889
8890 // RVV can only widen/truncate fp to types double/half the size as the source.
8891 if ((VT.getVectorElementType() == MVT::f64 &&
8892 (SrcVT.getVectorElementType() == MVT::f16 ||
8893 SrcVT.getVectorElementType() == MVT::bf16)) ||
8894 ((VT.getVectorElementType() == MVT::f16 ||
8895 VT.getVectorElementType() == MVT::bf16) &&
8896 SrcVT.getVectorElementType() == MVT::f64)) {
8897 // For double rounding, the intermediate rounding should be round-to-odd.
8898 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8901 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8902 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
8903 Chain, Src, Mask, VL);
8904 Chain = Src.getValue(1);
8905 }
8906
8907 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8910 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
8911 Chain, Src, Mask, VL);
8912 if (VT.isFixedLengthVector()) {
8913 // StrictFP operations have two result values. Their lowered result should
8914 // have same result count.
8915 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
8916 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
8917 }
8918 return Res;
8919}
8920
8921SDValue
8922RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
8923 SelectionDAG &DAG) const {
8924 bool IsVP =
8925 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
8926 bool IsExtend =
8927 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
8928 // RVV can only do truncate fp to types half the size as the source. We
8929 // custom-lower f64->f16 rounds via RVV's round-to-odd float
8930 // conversion instruction.
8931 SDLoc DL(Op);
8932 MVT VT = Op.getSimpleValueType();
8933
8934 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8935
8936 SDValue Src = Op.getOperand(0);
8937 MVT SrcVT = Src.getSimpleValueType();
8938
8939 bool IsDirectExtend =
8940 IsExtend && (VT.getVectorElementType() != MVT::f64 ||
8941 (SrcVT.getVectorElementType() != MVT::f16 &&
8942 SrcVT.getVectorElementType() != MVT::bf16));
8943 bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 &&
8944 VT.getVectorElementType() != MVT::bf16) ||
8945 SrcVT.getVectorElementType() != MVT::f64);
8946
8947 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
8948
8949 // Prepare any fixed-length vector operands.
8950 MVT ContainerVT = VT;
8951 SDValue Mask, VL;
8952 if (IsVP) {
8953 Mask = Op.getOperand(1);
8954 VL = Op.getOperand(2);
8955 }
8956 if (VT.isFixedLengthVector()) {
8957 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8958 ContainerVT =
8959 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8960 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8961 if (IsVP) {
8962 MVT MaskVT = getMaskTypeFor(ContainerVT);
8963 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8964 }
8965 }
8966
8967 if (!IsVP)
8968 std::tie(Mask, VL) =
8969 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8970
8971 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
8972
8973 if (IsDirectConv) {
8974 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
8975 if (VT.isFixedLengthVector())
8976 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
8977 return Src;
8978 }
8979
8980 unsigned InterConvOpc =
8982
8983 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8984 SDValue IntermediateConv =
8985 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
8986 SDValue Result =
8987 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
8988 if (VT.isFixedLengthVector())
8989 return convertFromScalableVector(VT, Result, DAG, Subtarget);
8990 return Result;
8991}
8992
8993// Given a scalable vector type and an index into it, returns the type for the
8994// smallest subvector that the index fits in. This can be used to reduce LMUL
8995// for operations like vslidedown.
8996//
8997// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
8998static std::optional<MVT>
8999getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
9000 const RISCVSubtarget &Subtarget) {
9001 assert(VecVT.isScalableVector());
9002 const unsigned EltSize = VecVT.getScalarSizeInBits();
9003 const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
9004 const unsigned MinVLMAX = VectorBitsMin / EltSize;
9005 MVT SmallerVT;
9006 if (MaxIdx < MinVLMAX)
9007 SmallerVT = getLMUL1VT(VecVT);
9008 else if (MaxIdx < MinVLMAX * 2)
9009 SmallerVT = getLMUL1VT(VecVT).getDoubleNumVectorElementsVT();
9010 else if (MaxIdx < MinVLMAX * 4)
9011 SmallerVT = getLMUL1VT(VecVT)
9014 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
9015 return std::nullopt;
9016 return SmallerVT;
9017}
9018
9019// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
9020// first position of a vector, and that vector is slid up to the insert index.
9021// By limiting the active vector length to index+1 and merging with the
9022// original vector (with an undisturbed tail policy for elements >= VL), we
9023// achieve the desired result of leaving all elements untouched except the one
9024// at VL-1, which is replaced with the desired value.
9025SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
9026 SelectionDAG &DAG) const {
9027 SDLoc DL(Op);
9028 MVT VecVT = Op.getSimpleValueType();
9029 MVT XLenVT = Subtarget.getXLenVT();
9030 SDValue Vec = Op.getOperand(0);
9031 SDValue Val = Op.getOperand(1);
9032 MVT ValVT = Val.getSimpleValueType();
9033 SDValue Idx = Op.getOperand(2);
9034
9035 if (VecVT.getVectorElementType() == MVT::i1) {
9036 // FIXME: For now we just promote to an i8 vector and insert into that,
9037 // but this is probably not optimal.
9038 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
9039 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
9040 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
9041 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
9042 }
9043
9044 if ((ValVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
9045 ValVT == MVT::bf16) {
9046 // If we don't have vfmv.s.f for f16/bf16, use fmv.x.h first.
9047 MVT IntVT = VecVT.changeTypeToInteger();
9048 SDValue IntInsert = DAG.getNode(
9049 ISD::INSERT_VECTOR_ELT, DL, IntVT, DAG.getBitcast(IntVT, Vec),
9050 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Val), Idx);
9051 return DAG.getBitcast(VecVT, IntInsert);
9052 }
9053
9054 MVT ContainerVT = VecVT;
9055 // If the operand is a fixed-length vector, convert to a scalable one.
9056 if (VecVT.isFixedLengthVector()) {
9057 ContainerVT = getContainerForFixedLengthVector(VecVT);
9058 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9059 }
9060
9061 // If we know the index we're going to insert at, we can shrink Vec so that
9062 // we're performing the scalar inserts and slideup on a smaller LMUL.
9063 MVT OrigContainerVT = ContainerVT;
9064 SDValue OrigVec = Vec;
9065 SDValue AlignedIdx;
9066 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
9067 const unsigned OrigIdx = IdxC->getZExtValue();
9068 // Do we know an upper bound on LMUL?
9069 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
9070 DL, DAG, Subtarget)) {
9071 ContainerVT = *ShrunkVT;
9072 AlignedIdx = DAG.getVectorIdxConstant(0, DL);
9073 }
9074
9075 // If we're compiling for an exact VLEN value, we can always perform
9076 // the insert in m1 as we can determine the register corresponding to
9077 // the index in the register group.
9078 const MVT M1VT = getLMUL1VT(ContainerVT);
9079 if (auto VLEN = Subtarget.getRealVLen();
9080 VLEN && ContainerVT.bitsGT(M1VT)) {
9081 EVT ElemVT = VecVT.getVectorElementType();
9082 unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
9083 unsigned RemIdx = OrigIdx % ElemsPerVReg;
9084 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
9085 unsigned ExtractIdx =
9086 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
9087 AlignedIdx = DAG.getVectorIdxConstant(ExtractIdx, DL);
9088 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
9089 ContainerVT = M1VT;
9090 }
9091
9092 if (AlignedIdx)
9093 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
9094 AlignedIdx);
9095 }
9096
9097 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
9098 // Even i64-element vectors on RV32 can be lowered without scalar
9099 // legalization if the most-significant 32 bits of the value are not affected
9100 // by the sign-extension of the lower 32 bits.
9101 // TODO: We could also catch sign extensions of a 32-bit value.
9102 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
9103 const auto *CVal = cast<ConstantSDNode>(Val);
9104 if (isInt<32>(CVal->getSExtValue())) {
9105 IsLegalInsert = true;
9106 Val = DAG.getSignedConstant(CVal->getSExtValue(), DL, MVT::i32);
9107 }
9108 }
9109
9110 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9111
9112 SDValue ValInVec;
9113
9114 if (IsLegalInsert) {
9115 unsigned Opc =
9117 if (isNullConstant(Idx)) {
9118 if (!VecVT.isFloatingPoint())
9119 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
9120 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
9121
9122 if (AlignedIdx)
9123 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
9124 Vec, AlignedIdx);
9125 if (!VecVT.isFixedLengthVector())
9126 return Vec;
9127 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
9128 }
9129 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
9130 } else {
9131 // On RV32, i64-element vectors must be specially handled to place the
9132 // value at element 0, by using two vslide1down instructions in sequence on
9133 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
9134 // this.
9135 SDValue ValLo, ValHi;
9136 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
9137 MVT I32ContainerVT =
9138 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
9139 SDValue I32Mask =
9140 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
9141 // Limit the active VL to two.
9142 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
9143 // If the Idx is 0 we can insert directly into the vector.
9144 if (isNullConstant(Idx)) {
9145 // First slide in the lo value, then the hi in above it. We use slide1down
9146 // to avoid the register group overlap constraint of vslide1up.
9147 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9148 Vec, Vec, ValLo, I32Mask, InsertI64VL);
9149 // If the source vector is undef don't pass along the tail elements from
9150 // the previous slide1down.
9151 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
9152 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9153 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
9154 // Bitcast back to the right container type.
9155 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
9156
9157 if (AlignedIdx)
9158 ValInVec =
9159 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
9160 ValInVec, AlignedIdx);
9161 if (!VecVT.isFixedLengthVector())
9162 return ValInVec;
9163 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
9164 }
9165
9166 // First slide in the lo value, then the hi in above it. We use slide1down
9167 // to avoid the register group overlap constraint of vslide1up.
9168 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9169 DAG.getUNDEF(I32ContainerVT),
9170 DAG.getUNDEF(I32ContainerVT), ValLo,
9171 I32Mask, InsertI64VL);
9172 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9173 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
9174 I32Mask, InsertI64VL);
9175 // Bitcast back to the right container type.
9176 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
9177 }
9178
9179 // Now that the value is in a vector, slide it into position.
9180 SDValue InsertVL =
9181 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
9182
9183 // Use tail agnostic policy if Idx is the last index of Vec.
9185 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
9186 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
9187 Policy = RISCVII::TAIL_AGNOSTIC;
9188 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
9189 Idx, Mask, InsertVL, Policy);
9190
9191 if (AlignedIdx)
9192 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
9193 Slideup, AlignedIdx);
9194 if (!VecVT.isFixedLengthVector())
9195 return Slideup;
9196 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
9197}
9198
9199// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
9200// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
9201// types this is done using VMV_X_S to allow us to glean information about the
9202// sign bits of the result.
9203SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
9204 SelectionDAG &DAG) const {
9205 SDLoc DL(Op);
9206 SDValue Idx = Op.getOperand(1);
9207 SDValue Vec = Op.getOperand(0);
9208 EVT EltVT = Op.getValueType();
9209 MVT VecVT = Vec.getSimpleValueType();
9210 MVT XLenVT = Subtarget.getXLenVT();
9211
9212 if (VecVT.getVectorElementType() == MVT::i1) {
9213 // Use vfirst.m to extract the first bit.
9214 if (isNullConstant(Idx)) {
9215 MVT ContainerVT = VecVT;
9216 if (VecVT.isFixedLengthVector()) {
9217 ContainerVT = getContainerForFixedLengthVector(VecVT);
9218 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9219 }
9220 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9221 SDValue Vfirst =
9222 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
9223 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
9224 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
9225 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
9226 }
9227 if (VecVT.isFixedLengthVector()) {
9228 unsigned NumElts = VecVT.getVectorNumElements();
9229 if (NumElts >= 8) {
9230 MVT WideEltVT;
9231 unsigned WidenVecLen;
9232 SDValue ExtractElementIdx;
9233 SDValue ExtractBitIdx;
9234 unsigned MaxEEW = Subtarget.getELen();
9235 MVT LargestEltVT = MVT::getIntegerVT(
9236 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
9237 if (NumElts <= LargestEltVT.getSizeInBits()) {
9238 assert(isPowerOf2_32(NumElts) &&
9239 "the number of elements should be power of 2");
9240 WideEltVT = MVT::getIntegerVT(NumElts);
9241 WidenVecLen = 1;
9242 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
9243 ExtractBitIdx = Idx;
9244 } else {
9245 WideEltVT = LargestEltVT;
9246 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
9247 // extract element index = index / element width
9248 ExtractElementIdx = DAG.getNode(
9249 ISD::SRL, DL, XLenVT, Idx,
9250 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
9251 // mask bit index = index % element width
9252 ExtractBitIdx = DAG.getNode(
9253 ISD::AND, DL, XLenVT, Idx,
9254 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
9255 }
9256 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
9257 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
9258 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
9259 Vec, ExtractElementIdx);
9260 // Extract the bit from GPR.
9261 SDValue ShiftRight =
9262 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
9263 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
9264 DAG.getConstant(1, DL, XLenVT));
9265 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
9266 }
9267 }
9268 // Otherwise, promote to an i8 vector and extract from that.
9269 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
9270 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
9271 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
9272 }
9273
9274 if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
9275 EltVT == MVT::bf16) {
9276 // If we don't have vfmv.f.s for f16/bf16, extract to a gpr then use fmv.h.x
9277 MVT IntVT = VecVT.changeTypeToInteger();
9278 SDValue IntVec = DAG.getBitcast(IntVT, Vec);
9279 SDValue IntExtract =
9280 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT, IntVec, Idx);
9281 return DAG.getNode(RISCVISD::FMV_H_X, DL, EltVT, IntExtract);
9282 }
9283
9284 // If this is a fixed vector, we need to convert it to a scalable vector.
9285 MVT ContainerVT = VecVT;
9286 if (VecVT.isFixedLengthVector()) {
9287 ContainerVT = getContainerForFixedLengthVector(VecVT);
9288 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9289 }
9290
9291 // If we're compiling for an exact VLEN value and we have a known
9292 // constant index, we can always perform the extract in m1 (or
9293 // smaller) as we can determine the register corresponding to
9294 // the index in the register group.
9295 const auto VLen = Subtarget.getRealVLen();
9296 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
9297 IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
9298 MVT M1VT = getLMUL1VT(ContainerVT);
9299 unsigned OrigIdx = IdxC->getZExtValue();
9300 EVT ElemVT = VecVT.getVectorElementType();
9301 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
9302 unsigned RemIdx = OrigIdx % ElemsPerVReg;
9303 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
9304 unsigned ExtractIdx =
9305 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
9306 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
9307 DAG.getVectorIdxConstant(ExtractIdx, DL));
9308 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
9309 ContainerVT = M1VT;
9310 }
9311
9312 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
9313 // contains our index.
9314 std::optional<uint64_t> MaxIdx;
9315 if (VecVT.isFixedLengthVector())
9316 MaxIdx = VecVT.getVectorNumElements() - 1;
9317 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
9318 MaxIdx = IdxC->getZExtValue();
9319 if (MaxIdx) {
9320 if (auto SmallerVT =
9321 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
9322 ContainerVT = *SmallerVT;
9323 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
9324 DAG.getConstant(0, DL, XLenVT));
9325 }
9326 }
9327
9328 // If after narrowing, the required slide is still greater than LMUL2,
9329 // fallback to generic expansion and go through the stack. This is done
9330 // for a subtle reason: extracting *all* elements out of a vector is
9331 // widely expected to be linear in vector size, but because vslidedown
9332 // is linear in LMUL, performing N extracts using vslidedown becomes
9333 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
9334 // seems to have the same problem (the store is linear in LMUL), but the
9335 // generic expansion *memoizes* the store, and thus for many extracts of
9336 // the same vector we end up with one store and a bunch of loads.
9337 // TODO: We don't have the same code for insert_vector_elt because we
9338 // have BUILD_VECTOR and handle the degenerate case there. Should we
9339 // consider adding an inverse BUILD_VECTOR node?
9340 MVT LMUL2VT = getLMUL1VT(ContainerVT).getDoubleNumVectorElementsVT();
9341 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
9342 return SDValue();
9343
9344 // If the index is 0, the vector is already in the right position.
9345 if (!isNullConstant(Idx)) {
9346 // Use a VL of 1 to avoid processing more elements than we need.
9347 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
9348 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
9349 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
9350 }
9351
9352 if (!EltVT.isInteger()) {
9353 // Floating-point extracts are handled in TableGen.
9354 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
9355 DAG.getVectorIdxConstant(0, DL));
9356 }
9357
9358 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
9359 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
9360}
9361
9362// Some RVV intrinsics may claim that they want an integer operand to be
9363// promoted or expanded.
9365 const RISCVSubtarget &Subtarget) {
9366 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
9367 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
9368 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
9369 "Unexpected opcode");
9370
9371 if (!Subtarget.hasVInstructions())
9372 return SDValue();
9373
9374 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
9375 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
9376 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
9377
9378 SDLoc DL(Op);
9379
9381 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
9382 if (!II || !II->hasScalarOperand())
9383 return SDValue();
9384
9385 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
9386 assert(SplatOp < Op.getNumOperands());
9387
9389 SDValue &ScalarOp = Operands[SplatOp];
9390 MVT OpVT = ScalarOp.getSimpleValueType();
9391 MVT XLenVT = Subtarget.getXLenVT();
9392
9393 // If this isn't a scalar, or its type is XLenVT we're done.
9394 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
9395 return SDValue();
9396
9397 // Simplest case is that the operand needs to be promoted to XLenVT.
9398 if (OpVT.bitsLT(XLenVT)) {
9399 // If the operand is a constant, sign extend to increase our chances
9400 // of being able to use a .vi instruction. ANY_EXTEND would become a
9401 // a zero extend and the simm5 check in isel would fail.
9402 // FIXME: Should we ignore the upper bits in isel instead?
9403 unsigned ExtOpc =
9404 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
9405 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
9406 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
9407 }
9408
9409 // Use the previous operand to get the vXi64 VT. The result might be a mask
9410 // VT for compares. Using the previous operand assumes that the previous
9411 // operand will never have a smaller element size than a scalar operand and
9412 // that a widening operation never uses SEW=64.
9413 // NOTE: If this fails the below assert, we can probably just find the
9414 // element count from any operand or result and use it to construct the VT.
9415 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
9416 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
9417
9418 // The more complex case is when the scalar is larger than XLenVT.
9419 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
9420 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
9421
9422 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
9423 // instruction to sign-extend since SEW>XLEN.
9424 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
9425 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
9426 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
9427 }
9428
9429 switch (IntNo) {
9430 case Intrinsic::riscv_vslide1up:
9431 case Intrinsic::riscv_vslide1down:
9432 case Intrinsic::riscv_vslide1up_mask:
9433 case Intrinsic::riscv_vslide1down_mask: {
9434 // We need to special case these when the scalar is larger than XLen.
9435 unsigned NumOps = Op.getNumOperands();
9436 bool IsMasked = NumOps == 7;
9437
9438 // Convert the vector source to the equivalent nxvXi32 vector.
9439 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
9440 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
9441 SDValue ScalarLo, ScalarHi;
9442 std::tie(ScalarLo, ScalarHi) =
9443 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
9444
9445 // Double the VL since we halved SEW.
9446 SDValue AVL = getVLOperand(Op);
9447 SDValue I32VL;
9448
9449 // Optimize for constant AVL
9450 if (isa<ConstantSDNode>(AVL)) {
9451 const auto [MinVLMAX, MaxVLMAX] =
9453
9454 uint64_t AVLInt = AVL->getAsZExtVal();
9455 if (AVLInt <= MinVLMAX) {
9456 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
9457 } else if (AVLInt >= 2 * MaxVLMAX) {
9458 // Just set vl to VLMAX in this situation
9459 I32VL = DAG.getRegister(RISCV::X0, XLenVT);
9460 } else {
9461 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
9462 // is related to the hardware implementation.
9463 // So let the following code handle
9464 }
9465 }
9466 if (!I32VL) {
9468 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
9469 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
9470 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
9471 SDValue SETVL =
9472 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
9473 // Using vsetvli instruction to get actually used length which related to
9474 // the hardware implementation
9475 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
9476 SEW, LMUL);
9477 I32VL =
9478 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
9479 }
9480
9481 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
9482
9483 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
9484 // instructions.
9485 SDValue Passthru;
9486 if (IsMasked)
9487 Passthru = DAG.getUNDEF(I32VT);
9488 else
9489 Passthru = DAG.getBitcast(I32VT, Operands[1]);
9490
9491 if (IntNo == Intrinsic::riscv_vslide1up ||
9492 IntNo == Intrinsic::riscv_vslide1up_mask) {
9493 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
9494 ScalarHi, I32Mask, I32VL);
9495 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
9496 ScalarLo, I32Mask, I32VL);
9497 } else {
9498 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
9499 ScalarLo, I32Mask, I32VL);
9500 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
9501 ScalarHi, I32Mask, I32VL);
9502 }
9503
9504 // Convert back to nxvXi64.
9505 Vec = DAG.getBitcast(VT, Vec);
9506
9507 if (!IsMasked)
9508 return Vec;
9509 // Apply mask after the operation.
9510 SDValue Mask = Operands[NumOps - 3];
9511 SDValue MaskedOff = Operands[1];
9512 // Assume Policy operand is the last operand.
9513 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();
9514 // We don't need to select maskedoff if it's undef.
9515 if (MaskedOff.isUndef())
9516 return Vec;
9517 // TAMU
9518 if (Policy == RISCVII::TAIL_AGNOSTIC)
9519 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
9520 DAG.getUNDEF(VT), AVL);
9521 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
9522 // It's fine because vmerge does not care mask policy.
9523 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
9524 MaskedOff, AVL);
9525 }
9526 }
9527
9528 // We need to convert the scalar to a splat vector.
9529 SDValue VL = getVLOperand(Op);
9530 assert(VL.getValueType() == XLenVT);
9531 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
9532 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
9533}
9534
9535// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
9536// scalable vector llvm.get.vector.length for now.
9537//
9538// We need to convert from a scalable VF to a vsetvli with VLMax equal to
9539// (vscale * VF). The vscale and VF are independent of element width. We use
9540// SEW=8 for the vsetvli because it is the only element width that supports all
9541// fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is
9542// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
9543// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
9544// SEW and LMUL are better for the surrounding vector instructions.
9546 const RISCVSubtarget &Subtarget) {
9547 MVT XLenVT = Subtarget.getXLenVT();
9548
9549 // The smallest LMUL is only valid for the smallest element width.
9550 const unsigned ElementWidth = 8;
9551
9552 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
9553 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
9554 // We don't support VF==1 with ELEN==32.
9555 [[maybe_unused]] unsigned MinVF =
9556 RISCV::RVVBitsPerBlock / Subtarget.getELen();
9557
9558 [[maybe_unused]] unsigned VF = N->getConstantOperandVal(2);
9559 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
9560 "Unexpected VF");
9561
9562 bool Fractional = VF < LMul1VF;
9563 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
9564 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
9565 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
9566
9567 SDLoc DL(N);
9568
9569 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
9570 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
9571
9572 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
9573
9574 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
9575 SDValue Res =
9576 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
9577 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
9578}
9579
9581 const RISCVSubtarget &Subtarget) {
9582 SDValue Op0 = N->getOperand(1);
9583 MVT OpVT = Op0.getSimpleValueType();
9584 MVT ContainerVT = OpVT;
9585 if (OpVT.isFixedLengthVector()) {
9586 ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget);
9587 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
9588 }
9589 MVT XLenVT = Subtarget.getXLenVT();
9590 SDLoc DL(N);
9591 auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);
9592 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);
9593 if (isOneConstant(N->getOperand(2)))
9594 return Res;
9595
9596 // Convert -1 to VL.
9597 SDValue Setcc =
9598 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
9599 VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());
9600 return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);
9601}
9602
9603static inline void promoteVCIXScalar(const SDValue &Op,
9605 SelectionDAG &DAG) {
9606 const RISCVSubtarget &Subtarget =
9608
9609 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
9610 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
9611 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
9612 SDLoc DL(Op);
9613
9615 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
9616 if (!II || !II->hasScalarOperand())
9617 return;
9618
9619 unsigned SplatOp = II->ScalarOperand + 1;
9620 assert(SplatOp < Op.getNumOperands());
9621
9622 SDValue &ScalarOp = Operands[SplatOp];
9623 MVT OpVT = ScalarOp.getSimpleValueType();
9624 MVT XLenVT = Subtarget.getXLenVT();
9625
9626 // The code below is partially copied from lowerVectorIntrinsicScalars.
9627 // If this isn't a scalar, or its type is XLenVT we're done.
9628 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
9629 return;
9630
9631 // Manually emit promote operation for scalar operation.
9632 if (OpVT.bitsLT(XLenVT)) {
9633 unsigned ExtOpc =
9634 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
9635 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
9636 }
9637}
9638
9639static void processVCIXOperands(SDValue &OrigOp,
9641 SelectionDAG &DAG) {
9642 promoteVCIXScalar(OrigOp, Operands, DAG);
9643 const RISCVSubtarget &Subtarget =
9645 for (SDValue &V : Operands) {
9646 EVT ValType = V.getValueType();
9647 if (ValType.isVector() && ValType.isFloatingPoint()) {
9648 MVT InterimIVT =
9649 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
9650 ValType.getVectorElementCount());
9651 V = DAG.getBitcast(InterimIVT, V);
9652 }
9653 if (ValType.isFixedLengthVector()) {
9654 MVT OpContainerVT = getContainerForFixedLengthVector(
9655 DAG, V.getSimpleValueType(), Subtarget);
9656 V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
9657 }
9658 }
9659}
9660
9661// LMUL * VLEN should be greater than or equal to EGS * SEW
9662static inline bool isValidEGW(int EGS, EVT VT,
9663 const RISCVSubtarget &Subtarget) {
9664 return (Subtarget.getRealMinVLen() *
9666 EGS * VT.getScalarSizeInBits();
9667}
9668
9669SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
9670 SelectionDAG &DAG) const {
9671 unsigned IntNo = Op.getConstantOperandVal(0);
9672 SDLoc DL(Op);
9673 MVT XLenVT = Subtarget.getXLenVT();
9674
9675 switch (IntNo) {
9676 default:
9677 break; // Don't custom lower most intrinsics.
9678 case Intrinsic::riscv_tuple_insert: {
9679 SDValue Vec = Op.getOperand(1);
9680 SDValue SubVec = Op.getOperand(2);
9681 SDValue Index = Op.getOperand(3);
9682
9683 return DAG.getNode(RISCVISD::TUPLE_INSERT, DL, Op.getValueType(), Vec,
9684 SubVec, Index);
9685 }
9686 case Intrinsic::riscv_tuple_extract: {
9687 SDValue Vec = Op.getOperand(1);
9688 SDValue Index = Op.getOperand(2);
9689
9690 return DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, Op.getValueType(), Vec,
9691 Index);
9692 }
9693 case Intrinsic::thread_pointer: {
9694 EVT PtrVT = getPointerTy(DAG.getDataLayout());
9695 return DAG.getRegister(RISCV::X4, PtrVT);
9696 }
9697 case Intrinsic::riscv_orc_b:
9698 case Intrinsic::riscv_brev8:
9699 case Intrinsic::riscv_sha256sig0:
9700 case Intrinsic::riscv_sha256sig1:
9701 case Intrinsic::riscv_sha256sum0:
9702 case Intrinsic::riscv_sha256sum1:
9703 case Intrinsic::riscv_sm3p0:
9704 case Intrinsic::riscv_sm3p1: {
9705 unsigned Opc;
9706 switch (IntNo) {
9707 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
9708 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
9709 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
9710 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
9711 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
9712 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
9713 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
9714 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
9715 }
9716
9717 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
9718 }
9719 case Intrinsic::riscv_sm4ks:
9720 case Intrinsic::riscv_sm4ed: {
9721 unsigned Opc =
9722 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
9723
9724 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
9725 Op.getOperand(3));
9726 }
9727 case Intrinsic::riscv_zip:
9728 case Intrinsic::riscv_unzip: {
9729 unsigned Opc =
9730 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
9731 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
9732 }
9733 case Intrinsic::riscv_mopr:
9734 return DAG.getNode(RISCVISD::MOPR, DL, XLenVT, Op.getOperand(1),
9735 Op.getOperand(2));
9736
9737 case Intrinsic::riscv_moprr: {
9738 return DAG.getNode(RISCVISD::MOPRR, DL, XLenVT, Op.getOperand(1),
9739 Op.getOperand(2), Op.getOperand(3));
9740 }
9741 case Intrinsic::riscv_clmul:
9742 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
9743 Op.getOperand(2));
9744 case Intrinsic::riscv_clmulh:
9745 case Intrinsic::riscv_clmulr: {
9746 unsigned Opc =
9747 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
9748 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
9749 }
9750 case Intrinsic::experimental_get_vector_length:
9751 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
9752 case Intrinsic::experimental_cttz_elts:
9753 return lowerCttzElts(Op.getNode(), DAG, Subtarget);
9754 case Intrinsic::riscv_vmv_x_s: {
9755 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
9756 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
9757 }
9758 case Intrinsic::riscv_vfmv_f_s:
9759 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
9760 Op.getOperand(1), DAG.getVectorIdxConstant(0, DL));
9761 case Intrinsic::riscv_vmv_v_x:
9762 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
9763 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
9764 Subtarget);
9765 case Intrinsic::riscv_vfmv_v_f:
9766 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
9767 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9768 case Intrinsic::riscv_vmv_s_x: {
9769 SDValue Scalar = Op.getOperand(2);
9770
9771 if (Scalar.getValueType().bitsLE(XLenVT)) {
9772 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
9773 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
9774 Op.getOperand(1), Scalar, Op.getOperand(3));
9775 }
9776
9777 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
9778
9779 // This is an i64 value that lives in two scalar registers. We have to
9780 // insert this in a convoluted way. First we build vXi64 splat containing
9781 // the two values that we assemble using some bit math. Next we'll use
9782 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
9783 // to merge element 0 from our splat into the source vector.
9784 // FIXME: This is probably not the best way to do this, but it is
9785 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
9786 // point.
9787 // sw lo, (a0)
9788 // sw hi, 4(a0)
9789 // vlse vX, (a0)
9790 //
9791 // vid.v vVid
9792 // vmseq.vx mMask, vVid, 0
9793 // vmerge.vvm vDest, vSrc, vVal, mMask
9794 MVT VT = Op.getSimpleValueType();
9795 SDValue Vec = Op.getOperand(1);
9796 SDValue VL = getVLOperand(Op);
9797
9798 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
9799 if (Op.getOperand(1).isUndef())
9800 return SplattedVal;
9801 SDValue SplattedIdx =
9802 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
9803 DAG.getConstant(0, DL, MVT::i32), VL);
9804
9805 MVT MaskVT = getMaskTypeFor(VT);
9806 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
9807 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
9808 SDValue SelectCond =
9809 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
9810 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
9811 DAG.getUNDEF(MaskVT), Mask, VL});
9812 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,
9813 Vec, DAG.getUNDEF(VT), VL);
9814 }
9815 case Intrinsic::riscv_vfmv_s_f:
9816 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(),
9817 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9818 // EGS * EEW >= 128 bits
9819 case Intrinsic::riscv_vaesdf_vv:
9820 case Intrinsic::riscv_vaesdf_vs:
9821 case Intrinsic::riscv_vaesdm_vv:
9822 case Intrinsic::riscv_vaesdm_vs:
9823 case Intrinsic::riscv_vaesef_vv:
9824 case Intrinsic::riscv_vaesef_vs:
9825 case Intrinsic::riscv_vaesem_vv:
9826 case Intrinsic::riscv_vaesem_vs:
9827 case Intrinsic::riscv_vaeskf1:
9828 case Intrinsic::riscv_vaeskf2:
9829 case Intrinsic::riscv_vaesz_vs:
9830 case Intrinsic::riscv_vsm4k:
9831 case Intrinsic::riscv_vsm4r_vv:
9832 case Intrinsic::riscv_vsm4r_vs: {
9833 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9834 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9835 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9836 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9837 return Op;
9838 }
9839 // EGS * EEW >= 256 bits
9840 case Intrinsic::riscv_vsm3c:
9841 case Intrinsic::riscv_vsm3me: {
9842 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
9843 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
9844 report_fatal_error("EGW should be greater than or equal to 8 * SEW.");
9845 return Op;
9846 }
9847 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
9848 case Intrinsic::riscv_vsha2ch:
9849 case Intrinsic::riscv_vsha2cl:
9850 case Intrinsic::riscv_vsha2ms: {
9851 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
9852 !Subtarget.hasStdExtZvknhb())
9853 report_fatal_error("SEW=64 needs Zvknhb to be enabled.");
9854 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9855 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9856 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9857 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9858 return Op;
9859 }
9860 case Intrinsic::riscv_sf_vc_v_x:
9861 case Intrinsic::riscv_sf_vc_v_i:
9862 case Intrinsic::riscv_sf_vc_v_xv:
9863 case Intrinsic::riscv_sf_vc_v_iv:
9864 case Intrinsic::riscv_sf_vc_v_vv:
9865 case Intrinsic::riscv_sf_vc_v_fv:
9866 case Intrinsic::riscv_sf_vc_v_xvv:
9867 case Intrinsic::riscv_sf_vc_v_ivv:
9868 case Intrinsic::riscv_sf_vc_v_vvv:
9869 case Intrinsic::riscv_sf_vc_v_fvv:
9870 case Intrinsic::riscv_sf_vc_v_xvw:
9871 case Intrinsic::riscv_sf_vc_v_ivw:
9872 case Intrinsic::riscv_sf_vc_v_vvw:
9873 case Intrinsic::riscv_sf_vc_v_fvw: {
9874 MVT VT = Op.getSimpleValueType();
9875
9876 SmallVector<SDValue> Operands{Op->op_values()};
9878
9879 MVT RetVT = VT;
9880 if (VT.isFixedLengthVector())
9882 else if (VT.isFloatingPoint())
9885
9886 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);
9887
9888 if (VT.isFixedLengthVector())
9889 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
9890 else if (VT.isFloatingPoint())
9891 NewNode = DAG.getBitcast(VT, NewNode);
9892
9893 if (Op == NewNode)
9894 break;
9895
9896 return NewNode;
9897 }
9898 }
9899
9900 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9901}
9902
9904 unsigned Type) {
9905 SDLoc DL(Op);
9906 SmallVector<SDValue> Operands{Op->op_values()};
9907 Operands.erase(Operands.begin() + 1);
9908
9909 const RISCVSubtarget &Subtarget =
9911 MVT VT = Op.getSimpleValueType();
9912 MVT RetVT = VT;
9913 MVT FloatVT = VT;
9914
9915 if (VT.isFloatingPoint()) {
9916 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
9918 FloatVT = RetVT;
9919 }
9920 if (VT.isFixedLengthVector())
9922 Subtarget);
9923
9925
9926 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
9927 SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);
9928 SDValue Chain = NewNode.getValue(1);
9929
9930 if (VT.isFixedLengthVector())
9931 NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
9932 if (VT.isFloatingPoint())
9933 NewNode = DAG.getBitcast(VT, NewNode);
9934
9935 NewNode = DAG.getMergeValues({NewNode, Chain}, DL);
9936
9937 return NewNode;
9938}
9939
9941 unsigned Type) {
9942 SmallVector<SDValue> Operands{Op->op_values()};
9943 Operands.erase(Operands.begin() + 1);
9945
9946 return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
9947}
9948
9949SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
9950 SelectionDAG &DAG) const {
9951 unsigned IntNo = Op.getConstantOperandVal(1);
9952 switch (IntNo) {
9953 default:
9954 break;
9955 case Intrinsic::riscv_seg2_load:
9956 case Intrinsic::riscv_seg3_load:
9957 case Intrinsic::riscv_seg4_load:
9958 case Intrinsic::riscv_seg5_load:
9959 case Intrinsic::riscv_seg6_load:
9960 case Intrinsic::riscv_seg7_load:
9961 case Intrinsic::riscv_seg8_load: {
9962 SDLoc DL(Op);
9963 static const Intrinsic::ID VlsegInts[7] = {
9964 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
9965 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
9966 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
9967 Intrinsic::riscv_vlseg8};
9968 unsigned NF = Op->getNumValues() - 1;
9969 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
9970 MVT XLenVT = Subtarget.getXLenVT();
9971 MVT VT = Op->getSimpleValueType(0);
9972 MVT ContainerVT = getContainerForFixedLengthVector(VT);
9973 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
9974 ContainerVT.getScalarSizeInBits();
9975 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
9976
9977 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
9978 SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
9979 auto *Load = cast<MemIntrinsicSDNode>(Op);
9980
9981 SDVTList VTs = DAG.getVTList({VecTupTy, MVT::Other});
9982 SDValue Ops[] = {
9983 Load->getChain(),
9984 IntID,
9985 DAG.getUNDEF(VecTupTy),
9986 Op.getOperand(2),
9987 VL,
9988 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
9989 SDValue Result =
9991 Load->getMemoryVT(), Load->getMemOperand());
9993 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) {
9994 SDValue SubVec =
9995 DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, ContainerVT,
9996 Result.getValue(0), DAG.getVectorIdxConstant(RetIdx, DL));
9997 Results.push_back(convertFromScalableVector(VT, SubVec, DAG, Subtarget));
9998 }
9999 Results.push_back(Result.getValue(1));
10000 return DAG.getMergeValues(Results, DL);
10001 }
10002 case Intrinsic::riscv_sf_vc_v_x_se:
10004 case Intrinsic::riscv_sf_vc_v_i_se:
10006 case Intrinsic::riscv_sf_vc_v_xv_se:
10008 case Intrinsic::riscv_sf_vc_v_iv_se:
10010 case Intrinsic::riscv_sf_vc_v_vv_se:
10012 case Intrinsic::riscv_sf_vc_v_fv_se:
10014 case Intrinsic::riscv_sf_vc_v_xvv_se:
10016 case Intrinsic::riscv_sf_vc_v_ivv_se:
10018 case Intrinsic::riscv_sf_vc_v_vvv_se:
10020 case Intrinsic::riscv_sf_vc_v_fvv_se:
10022 case Intrinsic::riscv_sf_vc_v_xvw_se:
10024 case Intrinsic::riscv_sf_vc_v_ivw_se:
10026 case Intrinsic::riscv_sf_vc_v_vvw_se:
10028 case Intrinsic::riscv_sf_vc_v_fvw_se:
10030 }
10031
10032 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
10033}
10034
10035SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
10036 SelectionDAG &DAG) const {
10037 unsigned IntNo = Op.getConstantOperandVal(1);
10038 switch (IntNo) {
10039 default:
10040 break;
10041 case Intrinsic::riscv_seg2_store:
10042 case Intrinsic::riscv_seg3_store:
10043 case Intrinsic::riscv_seg4_store:
10044 case Intrinsic::riscv_seg5_store:
10045 case Intrinsic::riscv_seg6_store:
10046 case Intrinsic::riscv_seg7_store:
10047 case Intrinsic::riscv_seg8_store: {
10048 SDLoc DL(Op);
10049 static const Intrinsic::ID VssegInts[] = {
10050 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
10051 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
10052 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
10053 Intrinsic::riscv_vsseg8};
10054 // Operands are (chain, int_id, vec*, ptr, vl)
10055 unsigned NF = Op->getNumOperands() - 4;
10056 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
10057 MVT XLenVT = Subtarget.getXLenVT();
10058 MVT VT = Op->getOperand(2).getSimpleValueType();
10059 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10060 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
10061 ContainerVT.getScalarSizeInBits();
10062 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
10063
10064 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
10065 SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);
10066 SDValue Ptr = Op->getOperand(NF + 2);
10067
10068 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
10069
10070 SDValue StoredVal = DAG.getUNDEF(VecTupTy);
10071 for (unsigned i = 0; i < NF; i++)
10072 StoredVal = DAG.getNode(
10073 RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
10075 ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget),
10076 DAG.getVectorIdxConstant(i, DL));
10077
10078 SDValue Ops[] = {
10079 FixedIntrinsic->getChain(),
10080 IntID,
10081 StoredVal,
10082 Ptr,
10083 VL,
10084 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
10085
10086 return DAG.getMemIntrinsicNode(
10087 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
10088 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
10089 }
10090 case Intrinsic::riscv_sf_vc_xv_se:
10092 case Intrinsic::riscv_sf_vc_iv_se:
10094 case Intrinsic::riscv_sf_vc_vv_se:
10096 case Intrinsic::riscv_sf_vc_fv_se:
10098 case Intrinsic::riscv_sf_vc_xvv_se:
10100 case Intrinsic::riscv_sf_vc_ivv_se:
10102 case Intrinsic::riscv_sf_vc_vvv_se:
10104 case Intrinsic::riscv_sf_vc_fvv_se:
10106 case Intrinsic::riscv_sf_vc_xvw_se:
10108 case Intrinsic::riscv_sf_vc_ivw_se:
10110 case Intrinsic::riscv_sf_vc_vvw_se:
10112 case Intrinsic::riscv_sf_vc_fvw_se:
10114 }
10115
10116 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
10117}
10118
10119static unsigned getRVVReductionOp(unsigned ISDOpcode) {
10120 switch (ISDOpcode) {
10121 default:
10122 llvm_unreachable("Unhandled reduction");
10123 case ISD::VP_REDUCE_ADD:
10124 case ISD::VECREDUCE_ADD:
10126 case ISD::VP_REDUCE_UMAX:
10129 case ISD::VP_REDUCE_SMAX:
10132 case ISD::VP_REDUCE_UMIN:
10135 case ISD::VP_REDUCE_SMIN:
10138 case ISD::VP_REDUCE_AND:
10139 case ISD::VECREDUCE_AND:
10141 case ISD::VP_REDUCE_OR:
10142 case ISD::VECREDUCE_OR:
10144 case ISD::VP_REDUCE_XOR:
10145 case ISD::VECREDUCE_XOR:
10147 case ISD::VP_REDUCE_FADD:
10149 case ISD::VP_REDUCE_SEQ_FADD:
10151 case ISD::VP_REDUCE_FMAX:
10152 case ISD::VP_REDUCE_FMAXIMUM:
10154 case ISD::VP_REDUCE_FMIN:
10155 case ISD::VP_REDUCE_FMINIMUM:
10157 }
10158
10159}
10160
10161SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
10162 SelectionDAG &DAG,
10163 bool IsVP) const {
10164 SDLoc DL(Op);
10165 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
10166 MVT VecVT = Vec.getSimpleValueType();
10167 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
10168 Op.getOpcode() == ISD::VECREDUCE_OR ||
10169 Op.getOpcode() == ISD::VECREDUCE_XOR ||
10170 Op.getOpcode() == ISD::VP_REDUCE_AND ||
10171 Op.getOpcode() == ISD::VP_REDUCE_OR ||
10172 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
10173 "Unexpected reduction lowering");
10174
10175 MVT XLenVT = Subtarget.getXLenVT();
10176
10177 MVT ContainerVT = VecVT;
10178 if (VecVT.isFixedLengthVector()) {
10179 ContainerVT = getContainerForFixedLengthVector(VecVT);
10180 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10181 }
10182
10183 SDValue Mask, VL;
10184 if (IsVP) {
10185 Mask = Op.getOperand(2);
10186 VL = Op.getOperand(3);
10187 } else {
10188 std::tie(Mask, VL) =
10189 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10190 }
10191
10193 switch (Op.getOpcode()) {
10194 default:
10195 llvm_unreachable("Unhandled reduction");
10196 case ISD::VECREDUCE_AND:
10197 case ISD::VP_REDUCE_AND: {
10198 // vcpop ~x == 0
10199 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
10200 if (IsVP || VecVT.isFixedLengthVector())
10201 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
10202 else
10203 Vec = DAG.getNode(ISD::XOR, DL, ContainerVT, Vec, TrueMask);
10204 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
10205 CC = ISD::SETEQ;
10206 break;
10207 }
10208 case ISD::VECREDUCE_OR:
10209 case ISD::VP_REDUCE_OR:
10210 // vcpop x != 0
10211 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
10212 CC = ISD::SETNE;
10213 break;
10214 case ISD::VECREDUCE_XOR:
10215 case ISD::VP_REDUCE_XOR: {
10216 // ((vcpop x) & 1) != 0
10217 SDValue One = DAG.getConstant(1, DL, XLenVT);
10218 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
10219 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
10220 CC = ISD::SETNE;
10221 break;
10222 }
10223 }
10224
10225 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
10226 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
10227 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
10228
10229 if (!IsVP)
10230 return SetCC;
10231
10232 // Now include the start value in the operation.
10233 // Note that we must return the start value when no elements are operated
10234 // upon. The vcpop instructions we've emitted in each case above will return
10235 // 0 for an inactive vector, and so we've already received the neutral value:
10236 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
10237 // can simply include the start value.
10238 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
10239 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
10240}
10241
10242static bool isNonZeroAVL(SDValue AVL) {
10243 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
10244 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
10245 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
10246 (ImmAVL && ImmAVL->getZExtValue() >= 1);
10247}
10248
10249/// Helper to lower a reduction sequence of the form:
10250/// scalar = reduce_op vec, scalar_start
10251static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
10252 SDValue StartValue, SDValue Vec, SDValue Mask,
10253 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
10254 const RISCVSubtarget &Subtarget) {
10255 const MVT VecVT = Vec.getSimpleValueType();
10256 const MVT M1VT = getLMUL1VT(VecVT);
10257 const MVT XLenVT = Subtarget.getXLenVT();
10258 const bool NonZeroAVL = isNonZeroAVL(VL);
10259
10260 // The reduction needs an LMUL1 input; do the splat at either LMUL1
10261 // or the original VT if fractional.
10262 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
10263 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
10264 // prove it is non-zero. For the AVL=0 case, we need the scalar to
10265 // be the result of the reduction operation.
10266 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
10267 SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL,
10268 DAG, Subtarget);
10269 if (M1VT != InnerVT)
10270 InitialValue =
10271 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT, DAG.getUNDEF(M1VT),
10272 InitialValue, DAG.getVectorIdxConstant(0, DL));
10273 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
10274 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
10275 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
10276 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
10277 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
10278 DAG.getVectorIdxConstant(0, DL));
10279}
10280
10281SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
10282 SelectionDAG &DAG) const {
10283 SDLoc DL(Op);
10284 SDValue Vec = Op.getOperand(0);
10285 EVT VecEVT = Vec.getValueType();
10286
10287 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
10288
10289 // Due to ordering in legalize types we may have a vector type that needs to
10290 // be split. Do that manually so we can get down to a legal type.
10291 while (getTypeAction(*DAG.getContext(), VecEVT) ==
10293 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
10294 VecEVT = Lo.getValueType();
10295 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
10296 }
10297
10298 // TODO: The type may need to be widened rather than split. Or widened before
10299 // it can be split.
10300 if (!isTypeLegal(VecEVT))
10301 return SDValue();
10302
10303 MVT VecVT = VecEVT.getSimpleVT();
10304 MVT VecEltVT = VecVT.getVectorElementType();
10305 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
10306
10307 MVT ContainerVT = VecVT;
10308 if (VecVT.isFixedLengthVector()) {
10309 ContainerVT = getContainerForFixedLengthVector(VecVT);
10310 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10311 }
10312
10313 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10314
10315 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
10316 switch (BaseOpc) {
10317 case ISD::AND:
10318 case ISD::OR:
10319 case ISD::UMAX:
10320 case ISD::UMIN:
10321 case ISD::SMAX:
10322 case ISD::SMIN:
10323 StartV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Vec,
10324 DAG.getVectorIdxConstant(0, DL));
10325 }
10326 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
10327 Mask, VL, DL, DAG, Subtarget);
10328}
10329
10330// Given a reduction op, this function returns the matching reduction opcode,
10331// the vector SDValue and the scalar SDValue required to lower this to a
10332// RISCVISD node.
10333static std::tuple<unsigned, SDValue, SDValue>
10335 const RISCVSubtarget &Subtarget) {
10336 SDLoc DL(Op);
10337 auto Flags = Op->getFlags();
10338 unsigned Opcode = Op.getOpcode();
10339 switch (Opcode) {
10340 default:
10341 llvm_unreachable("Unhandled reduction");
10342 case ISD::VECREDUCE_FADD: {
10343 // Use positive zero if we can. It is cheaper to materialize.
10344 SDValue Zero =
10345 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
10346 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
10347 }
10349 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
10350 Op.getOperand(0));
10354 case ISD::VECREDUCE_FMAX: {
10355 SDValue Front =
10356 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op.getOperand(0),
10357 DAG.getVectorIdxConstant(0, DL));
10358 unsigned RVVOpc =
10359 (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)
10362 return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
10363 }
10364 }
10365}
10366
10367SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
10368 SelectionDAG &DAG) const {
10369 SDLoc DL(Op);
10370 MVT VecEltVT = Op.getSimpleValueType();
10371
10372 unsigned RVVOpcode;
10373 SDValue VectorVal, ScalarVal;
10374 std::tie(RVVOpcode, VectorVal, ScalarVal) =
10375 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
10376 MVT VecVT = VectorVal.getSimpleValueType();
10377
10378 MVT ContainerVT = VecVT;
10379 if (VecVT.isFixedLengthVector()) {
10380 ContainerVT = getContainerForFixedLengthVector(VecVT);
10381 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
10382 }
10383
10384 MVT ResVT = Op.getSimpleValueType();
10385 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10386 SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,
10387 VL, DL, DAG, Subtarget);
10388 if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
10389 Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
10390 return Res;
10391
10392 if (Op->getFlags().hasNoNaNs())
10393 return Res;
10394
10395 // Force output to NaN if any element is Nan.
10396 SDValue IsNan =
10397 DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
10398 {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),
10399 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
10400 MVT XLenVT = Subtarget.getXLenVT();
10401 SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL);
10402 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,
10403 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
10404 return DAG.getSelect(
10405 DL, ResVT, NoNaNs, Res,
10406 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
10407}
10408
10409SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
10410 SelectionDAG &DAG) const {
10411 SDLoc DL(Op);
10412 unsigned Opc = Op.getOpcode();
10413 SDValue Start = Op.getOperand(0);
10414 SDValue Vec = Op.getOperand(1);
10415 EVT VecEVT = Vec.getValueType();
10416 MVT XLenVT = Subtarget.getXLenVT();
10417
10418 // TODO: The type may need to be widened rather than split. Or widened before
10419 // it can be split.
10420 if (!isTypeLegal(VecEVT))
10421 return SDValue();
10422
10423 MVT VecVT = VecEVT.getSimpleVT();
10424 unsigned RVVOpcode = getRVVReductionOp(Opc);
10425
10426 if (VecVT.isFixedLengthVector()) {
10427 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
10428 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10429 }
10430
10431 SDValue VL = Op.getOperand(3);
10432 SDValue Mask = Op.getOperand(2);
10433 SDValue Res =
10434 lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
10435 Vec, Mask, VL, DL, DAG, Subtarget);
10436 if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) ||
10437 Op->getFlags().hasNoNaNs())
10438 return Res;
10439
10440 // Propagate NaNs.
10441 MVT PredVT = getMaskTypeFor(Vec.getSimpleValueType());
10442 // Check if any of the elements in Vec is NaN.
10443 SDValue IsNaN = DAG.getNode(
10444 RISCVISD::SETCC_VL, DL, PredVT,
10445 {Vec, Vec, DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(PredVT), Mask, VL});
10446 SDValue VCPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNaN, Mask, VL);
10447 // Check if the start value is NaN.
10448 SDValue StartIsNaN = DAG.getSetCC(DL, XLenVT, Start, Start, ISD::SETUO);
10449 VCPop = DAG.getNode(ISD::OR, DL, XLenVT, VCPop, StartIsNaN);
10450 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, VCPop,
10451 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
10452 MVT ResVT = Res.getSimpleValueType();
10453 return DAG.getSelect(
10454 DL, ResVT, NoNaNs, Res,
10455 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
10456}
10457
10458SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
10459 SelectionDAG &DAG) const {
10460 SDValue Vec = Op.getOperand(0);
10461 SDValue SubVec = Op.getOperand(1);
10462 MVT VecVT = Vec.getSimpleValueType();
10463 MVT SubVecVT = SubVec.getSimpleValueType();
10464
10465 SDLoc DL(Op);
10466 MVT XLenVT = Subtarget.getXLenVT();
10467 unsigned OrigIdx = Op.getConstantOperandVal(2);
10468 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
10469
10470 if (OrigIdx == 0 && Vec.isUndef())
10471 return Op;
10472
10473 // We don't have the ability to slide mask vectors up indexed by their i1
10474 // elements; the smallest we can do is i8. Often we are able to bitcast to
10475 // equivalent i8 vectors. Note that when inserting a fixed-length vector
10476 // into a scalable one, we might not necessarily have enough scalable
10477 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
10478 if (SubVecVT.getVectorElementType() == MVT::i1) {
10479 if (VecVT.getVectorMinNumElements() >= 8 &&
10480 SubVecVT.getVectorMinNumElements() >= 8) {
10481 assert(OrigIdx % 8 == 0 && "Invalid index");
10482 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
10483 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
10484 "Unexpected mask vector lowering");
10485 OrigIdx /= 8;
10486 SubVecVT =
10487 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
10488 SubVecVT.isScalableVector());
10489 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
10490 VecVT.isScalableVector());
10491 Vec = DAG.getBitcast(VecVT, Vec);
10492 SubVec = DAG.getBitcast(SubVecVT, SubVec);
10493 } else {
10494 // We can't slide this mask vector up indexed by its i1 elements.
10495 // This poses a problem when we wish to insert a scalable vector which
10496 // can't be re-expressed as a larger type. Just choose the slow path and
10497 // extend to a larger type, then truncate back down.
10498 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
10499 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
10500 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
10501 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
10502 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
10503 Op.getOperand(2));
10504 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
10505 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
10506 }
10507 }
10508
10509 // If the subvector vector is a fixed-length type and we don't know VLEN
10510 // exactly, we cannot use subregister manipulation to simplify the codegen; we
10511 // don't know which register of a LMUL group contains the specific subvector
10512 // as we only know the minimum register size. Therefore we must slide the
10513 // vector group up the full amount.
10514 const auto VLen = Subtarget.getRealVLen();
10515 if (SubVecVT.isFixedLengthVector() && !VLen) {
10516 MVT ContainerVT = VecVT;
10517 if (VecVT.isFixedLengthVector()) {
10518 ContainerVT = getContainerForFixedLengthVector(VecVT);
10519 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10520 }
10521
10522 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
10523 DAG.getUNDEF(ContainerVT), SubVec,
10524 DAG.getVectorIdxConstant(0, DL));
10525
10526 SDValue Mask =
10527 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
10528 // Set the vector length to only the number of elements we care about. Note
10529 // that for slideup this includes the offset.
10530 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
10531 SDValue VL = DAG.getConstant(EndIndex, DL, XLenVT);
10532
10533 // Use tail agnostic policy if we're inserting over Vec's tail.
10535 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
10536 Policy = RISCVII::TAIL_AGNOSTIC;
10537
10538 // If we're inserting into the lowest elements, use a tail undisturbed
10539 // vmv.v.v.
10540 if (OrigIdx == 0) {
10541 SubVec =
10542 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
10543 } else {
10544 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
10545 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
10546 SlideupAmt, Mask, VL, Policy);
10547 }
10548
10549 if (VecVT.isFixedLengthVector())
10550 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
10551 return DAG.getBitcast(Op.getValueType(), SubVec);
10552 }
10553
10554 MVT ContainerVecVT = VecVT;
10555 if (VecVT.isFixedLengthVector()) {
10556 ContainerVecVT = getContainerForFixedLengthVector(VecVT);
10557 Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
10558 }
10559
10560 MVT ContainerSubVecVT = SubVecVT;
10561 if (SubVecVT.isFixedLengthVector()) {
10562 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
10563 SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
10564 }
10565
10566 unsigned SubRegIdx;
10567 ElementCount RemIdx;
10568 // insert_subvector scales the index by vscale if the subvector is scalable,
10569 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
10570 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
10571 if (SubVecVT.isFixedLengthVector()) {
10572 assert(VLen);
10573 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10574 auto Decompose =
10576 ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
10577 SubRegIdx = Decompose.first;
10578 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
10579 (OrigIdx % Vscale));
10580 } else {
10581 auto Decompose =
10583 ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
10584 SubRegIdx = Decompose.first;
10585 RemIdx = ElementCount::getScalable(Decompose.second);
10586 }
10587
10590 Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));
10591 bool ExactlyVecRegSized =
10592 Subtarget.expandVScale(SubVecVT.getSizeInBits())
10593 .isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));
10594
10595 // 1. If the Idx has been completely eliminated and this subvector's size is
10596 // a vector register or a multiple thereof, or the surrounding elements are
10597 // undef, then this is a subvector insert which naturally aligns to a vector
10598 // register. These can easily be handled using subregister manipulation.
10599 // 2. If the subvector isn't an exact multiple of a valid register group size,
10600 // then the insertion must preserve the undisturbed elements of the register.
10601 // We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
10602 // vector type (which resolves to a subregister copy), performing a VSLIDEUP
10603 // to place the subvector within the vector register, and an INSERT_SUBVECTOR
10604 // of that LMUL=1 type back into the larger vector (resolving to another
10605 // subregister operation). See below for how our VSLIDEUP works. We go via a
10606 // LMUL=1 type to avoid allocating a large register group to hold our
10607 // subvector.
10608 if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {
10609 if (SubVecVT.isFixedLengthVector()) {
10610 // We may get NoSubRegister if inserting at index 0 and the subvec
10611 // container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
10612 if (SubRegIdx == RISCV::NoSubRegister) {
10613 assert(OrigIdx == 0);
10614 return Op;
10615 }
10616
10617 // Use a insert_subvector that will resolve to an insert subreg.
10618 assert(VLen);
10619 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10620 SDValue Insert =
10621 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec,
10622 DAG.getConstant(OrigIdx / Vscale, DL, XLenVT));
10623 if (VecVT.isFixedLengthVector())
10624 Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
10625 return Insert;
10626 }
10627 return Op;
10628 }
10629
10630 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
10631 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
10632 // (in our case undisturbed). This means we can set up a subvector insertion
10633 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
10634 // size of the subvector.
10635 MVT InterSubVT = ContainerVecVT;
10636 SDValue AlignedExtract = Vec;
10637 unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
10638 if (SubVecVT.isFixedLengthVector()) {
10639 assert(VLen);
10640 AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
10641 }
10642 if (ContainerVecVT.bitsGT(getLMUL1VT(ContainerVecVT))) {
10643 InterSubVT = getLMUL1VT(ContainerVecVT);
10644 // Extract a subvector equal to the nearest full vector register type. This
10645 // should resolve to a EXTRACT_SUBREG instruction.
10646 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
10647 DAG.getVectorIdxConstant(AlignedIdx, DL));
10648 }
10649
10650 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
10651 DAG.getUNDEF(InterSubVT), SubVec,
10652 DAG.getVectorIdxConstant(0, DL));
10653
10654 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);
10655
10656 ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
10657 VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());
10658
10659 // Use tail agnostic policy if we're inserting over InterSubVT's tail.
10661 if (Subtarget.expandVScale(EndIndex) ==
10662 Subtarget.expandVScale(InterSubVT.getVectorElementCount()))
10663 Policy = RISCVII::TAIL_AGNOSTIC;
10664
10665 // If we're inserting into the lowest elements, use a tail undisturbed
10666 // vmv.v.v.
10667 if (RemIdx.isZero()) {
10668 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
10669 SubVec, VL);
10670 } else {
10671 SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10672
10673 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
10674 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
10675
10676 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
10677 SlideupAmt, Mask, VL, Policy);
10678 }
10679
10680 // If required, insert this subvector back into the correct vector register.
10681 // This should resolve to an INSERT_SUBREG instruction.
10682 if (ContainerVecVT.bitsGT(InterSubVT))
10683 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec,
10684 DAG.getVectorIdxConstant(AlignedIdx, DL));
10685
10686 if (VecVT.isFixedLengthVector())
10687 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
10688
10689 // We might have bitcast from a mask type: cast back to the original type if
10690 // required.
10691 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
10692}
10693
10694SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
10695 SelectionDAG &DAG) const {
10696 SDValue Vec = Op.getOperand(0);
10697 MVT SubVecVT = Op.getSimpleValueType();
10698 MVT VecVT = Vec.getSimpleValueType();
10699
10700 SDLoc DL(Op);
10701 MVT XLenVT = Subtarget.getXLenVT();
10702 unsigned OrigIdx = Op.getConstantOperandVal(1);
10703 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
10704
10705 // With an index of 0 this is a cast-like subvector, which can be performed
10706 // with subregister operations.
10707 if (OrigIdx == 0)
10708 return Op;
10709
10710 // We don't have the ability to slide mask vectors down indexed by their i1
10711 // elements; the smallest we can do is i8. Often we are able to bitcast to
10712 // equivalent i8 vectors. Note that when extracting a fixed-length vector
10713 // from a scalable one, we might not necessarily have enough scalable
10714 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
10715 if (SubVecVT.getVectorElementType() == MVT::i1) {
10716 if (VecVT.getVectorMinNumElements() >= 8 &&
10717 SubVecVT.getVectorMinNumElements() >= 8) {
10718 assert(OrigIdx % 8 == 0 && "Invalid index");
10719 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
10720 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
10721 "Unexpected mask vector lowering");
10722 OrigIdx /= 8;
10723 SubVecVT =
10724 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
10725 SubVecVT.isScalableVector());
10726 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
10727 VecVT.isScalableVector());
10728 Vec = DAG.getBitcast(VecVT, Vec);
10729 } else {
10730 // We can't slide this mask vector down, indexed by its i1 elements.
10731 // This poses a problem when we wish to extract a scalable vector which
10732 // can't be re-expressed as a larger type. Just choose the slow path and
10733 // extend to a larger type, then truncate back down.
10734 // TODO: We could probably improve this when extracting certain fixed
10735 // from fixed, where we can extract as i8 and shift the correct element
10736 // right to reach the desired subvector?
10737 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
10738 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
10739 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
10740 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
10741 Op.getOperand(1));
10742 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
10743 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
10744 }
10745 }
10746
10747 const auto VLen = Subtarget.getRealVLen();
10748
10749 // If the subvector vector is a fixed-length type and we don't know VLEN
10750 // exactly, we cannot use subregister manipulation to simplify the codegen; we
10751 // don't know which register of a LMUL group contains the specific subvector
10752 // as we only know the minimum register size. Therefore we must slide the
10753 // vector group down the full amount.
10754 if (SubVecVT.isFixedLengthVector() && !VLen) {
10755 MVT ContainerVT = VecVT;
10756 if (VecVT.isFixedLengthVector()) {
10757 ContainerVT = getContainerForFixedLengthVector(VecVT);
10758 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10759 }
10760
10761 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
10762 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
10763 if (auto ShrunkVT =
10764 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
10765 ContainerVT = *ShrunkVT;
10766 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
10767 DAG.getVectorIdxConstant(0, DL));
10768 }
10769
10770 SDValue Mask =
10771 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
10772 // Set the vector length to only the number of elements we care about. This
10773 // avoids sliding down elements we're going to discard straight away.
10774 SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
10775 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
10776 SDValue Slidedown =
10777 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
10778 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
10779 // Now we can use a cast-like subvector extract to get the result.
10780 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10781 DAG.getVectorIdxConstant(0, DL));
10782 return DAG.getBitcast(Op.getValueType(), Slidedown);
10783 }
10784
10785 if (VecVT.isFixedLengthVector()) {
10786 VecVT = getContainerForFixedLengthVector(VecVT);
10787 Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget);
10788 }
10789
10790 MVT ContainerSubVecVT = SubVecVT;
10791 if (SubVecVT.isFixedLengthVector())
10792 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
10793
10794 unsigned SubRegIdx;
10795 ElementCount RemIdx;
10796 // extract_subvector scales the index by vscale if the subvector is scalable,
10797 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
10798 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
10799 if (SubVecVT.isFixedLengthVector()) {
10800 assert(VLen);
10801 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10802 auto Decompose =
10804 VecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
10805 SubRegIdx = Decompose.first;
10806 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
10807 (OrigIdx % Vscale));
10808 } else {
10809 auto Decompose =
10811 VecVT, ContainerSubVecVT, OrigIdx, TRI);
10812 SubRegIdx = Decompose.first;
10813 RemIdx = ElementCount::getScalable(Decompose.second);
10814 }
10815
10816 // If the Idx has been completely eliminated then this is a subvector extract
10817 // which naturally aligns to a vector register. These can easily be handled
10818 // using subregister manipulation. We use an extract_subvector that will
10819 // resolve to an extract subreg.
10820 if (RemIdx.isZero()) {
10821 if (SubVecVT.isFixedLengthVector()) {
10822 assert(VLen);
10823 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10824 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerSubVecVT, Vec,
10825 DAG.getConstant(OrigIdx / Vscale, DL, XLenVT));
10826 return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);
10827 }
10828 return Op;
10829 }
10830
10831 // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
10832 // was > M1 then the index would need to be a multiple of VLMAX, and so would
10833 // divide exactly.
10834 assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||
10835 getLMUL(ContainerSubVecVT) == RISCVII::VLMUL::LMUL_1);
10836
10837 // If the vector type is an LMUL-group type, extract a subvector equal to the
10838 // nearest full vector register type.
10839 MVT InterSubVT = VecVT;
10840 if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
10841 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
10842 // we should have successfully decomposed the extract into a subregister.
10843 // We use an extract_subvector that will resolve to a subreg extract.
10844 assert(SubRegIdx != RISCV::NoSubRegister);
10845 (void)SubRegIdx;
10846 unsigned Idx = OrigIdx - RemIdx.getKnownMinValue();
10847 if (SubVecVT.isFixedLengthVector()) {
10848 assert(VLen);
10849 Idx /= *VLen / RISCV::RVVBitsPerBlock;
10850 }
10851 InterSubVT = getLMUL1VT(VecVT);
10852 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
10853 DAG.getConstant(Idx, DL, XLenVT));
10854 }
10855
10856 // Slide this vector register down by the desired number of elements in order
10857 // to place the desired subvector starting at element 0.
10858 SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10859 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
10860 if (SubVecVT.isFixedLengthVector())
10861 VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
10862 SDValue Slidedown =
10863 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
10864 Vec, SlidedownAmt, Mask, VL);
10865
10866 // Now the vector is in the right position, extract our final subvector. This
10867 // should resolve to a COPY.
10868 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10869 DAG.getVectorIdxConstant(0, DL));
10870
10871 // We might have bitcast from a mask type: cast back to the original type if
10872 // required.
10873 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
10874}
10875
10876// Widen a vector's operands to i8, then truncate its results back to the
10877// original type, typically i1. All operand and result types must be the same.
10879 SelectionDAG &DAG) {
10880 MVT VT = N.getSimpleValueType();
10881 MVT WideVT = VT.changeVectorElementType(MVT::i8);
10883 for (SDValue Op : N->ops()) {
10884 assert(Op.getSimpleValueType() == VT &&
10885 "Operands and result must be same type");
10886 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
10887 }
10888
10889 unsigned NumVals = N->getNumValues();
10890
10892 NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
10893 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
10894 SmallVector<SDValue, 4> TruncVals;
10895 for (unsigned I = 0; I < NumVals; I++) {
10896 TruncVals.push_back(
10897 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
10898 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
10899 }
10900
10901 if (TruncVals.size() > 1)
10902 return DAG.getMergeValues(TruncVals, DL);
10903 return TruncVals.front();
10904}
10905
10906SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
10907 SelectionDAG &DAG) const {
10908 SDLoc DL(Op);
10909 MVT VecVT = Op.getSimpleValueType();
10910
10911 assert(VecVT.isScalableVector() &&
10912 "vector_interleave on non-scalable vector!");
10913
10914 // 1 bit element vectors need to be widened to e8
10915 if (VecVT.getVectorElementType() == MVT::i1)
10916 return widenVectorOpsToi8(Op, DL, DAG);
10917
10918 // If the VT is LMUL=8, we need to split and reassemble.
10919 if (VecVT.getSizeInBits().getKnownMinValue() ==
10920 (8 * RISCV::RVVBitsPerBlock)) {
10921 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10922 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10923 EVT SplitVT = Op0Lo.getValueType();
10924
10926 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op0Hi);
10928 DAG.getVTList(SplitVT, SplitVT), Op1Lo, Op1Hi);
10929
10930 SDValue Even = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10931 ResLo.getValue(0), ResHi.getValue(0));
10932 SDValue Odd = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, ResLo.getValue(1),
10933 ResHi.getValue(1));
10934 return DAG.getMergeValues({Even, Odd}, DL);
10935 }
10936
10937 // Concatenate the two vectors as one vector to deinterleave
10938 MVT ConcatVT =
10941 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
10942 Op.getOperand(0), Op.getOperand(1));
10943
10944 // We can deinterleave through vnsrl.wi if the element type is smaller than
10945 // ELEN
10946 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
10947 SDValue Even = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 0, DAG);
10948 SDValue Odd = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 1, DAG);
10949 return DAG.getMergeValues({Even, Odd}, DL);
10950 }
10951
10952 // For the indices, use the vmv.v.x of an i8 constant to fill the largest
10953 // possibly mask vector, then extract the required subvector. Doing this
10954 // (instead of a vid, vmsne sequence) reduces LMUL, and allows the mask
10955 // creation to be rematerialized during register allocation to reduce
10956 // register pressure if needed.
10957
10958 MVT MaskVT = ConcatVT.changeVectorElementType(MVT::i1);
10959
10960 SDValue EvenSplat = DAG.getConstant(0b01010101, DL, MVT::nxv8i8);
10961 EvenSplat = DAG.getBitcast(MVT::nxv64i1, EvenSplat);
10962 SDValue EvenMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MaskVT, EvenSplat,
10963 DAG.getVectorIdxConstant(0, DL));
10964
10965 SDValue OddSplat = DAG.getConstant(0b10101010, DL, MVT::nxv8i8);
10966 OddSplat = DAG.getBitcast(MVT::nxv64i1, OddSplat);
10967 SDValue OddMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MaskVT, OddSplat,
10968 DAG.getVectorIdxConstant(0, DL));
10969
10970 // vcompress the even and odd elements into two separate vectors
10971 SDValue EvenWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
10972 EvenMask, DAG.getUNDEF(ConcatVT));
10973 SDValue OddWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
10974 OddMask, DAG.getUNDEF(ConcatVT));
10975
10976 // Extract the result half of the gather for even and odd
10977 SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, EvenWide,
10978 DAG.getVectorIdxConstant(0, DL));
10979 SDValue Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, OddWide,
10980 DAG.getVectorIdxConstant(0, DL));
10981
10982 return DAG.getMergeValues({Even, Odd}, DL);
10983}
10984
10985SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
10986 SelectionDAG &DAG) const {
10987 SDLoc DL(Op);
10988 MVT VecVT = Op.getSimpleValueType();
10989
10990 assert(VecVT.isScalableVector() &&
10991 "vector_interleave on non-scalable vector!");
10992
10993 // i1 vectors need to be widened to i8
10994 if (VecVT.getVectorElementType() == MVT::i1)
10995 return widenVectorOpsToi8(Op, DL, DAG);
10996
10997 MVT XLenVT = Subtarget.getXLenVT();
10998 SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
10999
11000 // If the VT is LMUL=8, we need to split and reassemble.
11001 if (VecVT.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock)) {
11002 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
11003 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
11004 EVT SplitVT = Op0Lo.getValueType();
11005
11007 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op1Lo);
11009 DAG.getVTList(SplitVT, SplitVT), Op0Hi, Op1Hi);
11010
11011 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
11012 ResLo.getValue(0), ResLo.getValue(1));
11013 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
11014 ResHi.getValue(0), ResHi.getValue(1));
11015 return DAG.getMergeValues({Lo, Hi}, DL);
11016 }
11017
11018 SDValue Interleaved;
11019
11020 // If the element type is smaller than ELEN, then we can interleave with
11021 // vwaddu.vv and vwmaccu.vx
11022 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
11023 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
11024 DAG, Subtarget);
11025 } else {
11026 // Otherwise, fallback to using vrgathere16.vv
11027 MVT ConcatVT =
11030 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
11031 Op.getOperand(0), Op.getOperand(1));
11032
11033 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
11034
11035 // 0 1 2 3 4 5 6 7 ...
11036 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
11037
11038 // 1 1 1 1 1 1 1 1 ...
11039 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
11040
11041 // 1 0 1 0 1 0 1 0 ...
11042 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
11043 OddMask = DAG.getSetCC(
11044 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
11045 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
11047
11048 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
11049
11050 // Build up the index vector for interleaving the concatenated vector
11051 // 0 0 1 1 2 2 3 3 ...
11052 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
11053 // 0 n 1 n+1 2 n+2 3 n+3 ...
11054 Idx =
11055 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
11056
11057 // Then perform the interleave
11058 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
11059 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
11060 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
11061 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
11062 }
11063
11064 // Extract the two halves from the interleaved result
11065 SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
11066 DAG.getVectorIdxConstant(0, DL));
11067 SDValue Hi = DAG.getNode(
11068 ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
11070
11071 return DAG.getMergeValues({Lo, Hi}, DL);
11072}
11073
11074// Lower step_vector to the vid instruction. Any non-identity step value must
11075// be accounted for my manual expansion.
11076SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
11077 SelectionDAG &DAG) const {
11078 SDLoc DL(Op);
11079 MVT VT = Op.getSimpleValueType();
11080 assert(VT.isScalableVector() && "Expected scalable vector");
11081 MVT XLenVT = Subtarget.getXLenVT();
11082 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
11083 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
11084 uint64_t StepValImm = Op.getConstantOperandVal(0);
11085 if (StepValImm != 1) {
11086 if (isPowerOf2_64(StepValImm)) {
11087 SDValue StepVal =
11088 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
11089 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
11090 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
11091 } else {
11092 SDValue StepVal = lowerScalarSplat(
11093 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
11094 VL, VT, DL, DAG, Subtarget);
11095 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
11096 }
11097 }
11098 return StepVec;
11099}
11100
11101// Implement vector_reverse using vrgather.vv with indices determined by
11102// subtracting the id of each element from (VLMAX-1). This will convert
11103// the indices like so:
11104// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
11105// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
11106SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
11107 SelectionDAG &DAG) const {
11108 SDLoc DL(Op);
11109 MVT VecVT = Op.getSimpleValueType();
11110 if (VecVT.getVectorElementType() == MVT::i1) {
11111 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
11112 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
11113 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
11114 return DAG.getSetCC(DL, VecVT, Op2,
11115 DAG.getConstant(0, DL, Op2.getValueType()), ISD::SETNE);
11116 }
11117
11118 MVT ContainerVT = VecVT;
11119 SDValue Vec = Op.getOperand(0);
11120 if (VecVT.isFixedLengthVector()) {
11121 ContainerVT = getContainerForFixedLengthVector(VecVT);
11122 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11123 }
11124
11125 MVT XLenVT = Subtarget.getXLenVT();
11126 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11127
11128 // On some uarchs vrgather.vv will read from every input register for each
11129 // output register, regardless of the indices. However to reverse a vector
11130 // each output register only needs to read from one register. So decompose it
11131 // into LMUL * M1 vrgather.vvs, so we get O(LMUL) performance instead of
11132 // O(LMUL^2).
11133 //
11134 // vsetvli a1, zero, e64, m4, ta, ma
11135 // vrgatherei16.vv v12, v8, v16
11136 // ->
11137 // vsetvli a1, zero, e64, m1, ta, ma
11138 // vrgather.vv v15, v8, v16
11139 // vrgather.vv v14, v9, v16
11140 // vrgather.vv v13, v10, v16
11141 // vrgather.vv v12, v11, v16
11142 if (ContainerVT.bitsGT(getLMUL1VT(ContainerVT)) &&
11143 ContainerVT.getVectorElementCount().isKnownMultipleOf(2)) {
11144 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
11145 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, Lo.getSimpleValueType(), Lo);
11146 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, Hi.getSimpleValueType(), Hi);
11147 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ContainerVT, Hi, Lo);
11148
11149 // Fixed length vectors might not fit exactly into their container, and so
11150 // leave a gap in the front of the vector after being reversed. Slide this
11151 // away.
11152 //
11153 // x x x x 3 2 1 0 <- v4i16 @ vlen=128
11154 // 0 1 2 3 x x x x <- reverse
11155 // x x x x 0 1 2 3 <- vslidedown.vx
11156 if (VecVT.isFixedLengthVector()) {
11157 SDValue Offset = DAG.getNode(
11158 ISD::SUB, DL, XLenVT,
11159 DAG.getElementCount(DL, XLenVT, ContainerVT.getVectorElementCount()),
11160 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()));
11161 Concat =
11162 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
11163 DAG.getUNDEF(ContainerVT), Concat, Offset, Mask, VL);
11164 Concat = convertFromScalableVector(VecVT, Concat, DAG, Subtarget);
11165 }
11166 return Concat;
11167 }
11168
11169 unsigned EltSize = ContainerVT.getScalarSizeInBits();
11170 unsigned MinSize = ContainerVT.getSizeInBits().getKnownMinValue();
11171 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
11172 unsigned MaxVLMAX =
11173 VecVT.isFixedLengthVector()
11174 ? VecVT.getVectorNumElements()
11175 : RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
11176
11177 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
11178 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
11179
11180 // If this is SEW=8 and VLMAX is potentially more than 256, we need
11181 // to use vrgatherei16.vv.
11182 if (MaxVLMAX > 256 && EltSize == 8) {
11183 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
11184 // Reverse each half, then reassemble them in reverse order.
11185 // NOTE: It's also possible that after splitting that VLMAX no longer
11186 // requires vrgatherei16.vv.
11187 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
11188 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
11189 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
11190 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
11191 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
11192 // Reassemble the low and high pieces reversed.
11193 // FIXME: This is a CONCAT_VECTORS.
11194 SDValue Res =
11195 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
11196 DAG.getVectorIdxConstant(0, DL));
11197 return DAG.getNode(
11198 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
11199 DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
11200 }
11201
11202 // Just promote the int type to i16 which will double the LMUL.
11203 IntVT = MVT::getVectorVT(MVT::i16, ContainerVT.getVectorElementCount());
11204 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
11205 }
11206
11207 // At LMUL > 1, do the index computation in 16 bits to reduce register
11208 // pressure.
11209 if (IntVT.getScalarType().bitsGT(MVT::i16) &&
11210 IntVT.bitsGT(getLMUL1VT(IntVT))) {
11211 assert(isUInt<16>(MaxVLMAX - 1)); // Largest VLMAX is 65536 @ zvl65536b
11212 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
11213 IntVT = IntVT.changeVectorElementType(MVT::i16);
11214 }
11215
11216 // Calculate VLMAX-1 for the desired SEW.
11217 SDValue VLMinus1 = DAG.getNode(
11218 ISD::SUB, DL, XLenVT,
11219 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()),
11220 DAG.getConstant(1, DL, XLenVT));
11221
11222 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
11223 bool IsRV32E64 =
11224 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
11225 SDValue SplatVL;
11226 if (!IsRV32E64)
11227 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
11228 else
11229 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
11230 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
11231
11232 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
11233 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
11234 DAG.getUNDEF(IntVT), Mask, VL);
11235
11236 SDValue Gather = DAG.getNode(GatherOpc, DL, ContainerVT, Vec, Indices,
11237 DAG.getUNDEF(ContainerVT), Mask, VL);
11238 if (VecVT.isFixedLengthVector())
11239 Gather = convertFromScalableVector(VecVT, Gather, DAG, Subtarget);
11240 return Gather;
11241}
11242
11243SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
11244 SelectionDAG &DAG) const {
11245 SDLoc DL(Op);
11246 SDValue V1 = Op.getOperand(0);
11247 SDValue V2 = Op.getOperand(1);
11248 MVT XLenVT = Subtarget.getXLenVT();
11249 MVT VecVT = Op.getSimpleValueType();
11250
11251 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
11252
11253 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
11254 SDValue DownOffset, UpOffset;
11255 if (ImmValue >= 0) {
11256 // The operand is a TargetConstant, we need to rebuild it as a regular
11257 // constant.
11258 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
11259 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
11260 } else {
11261 // The operand is a TargetConstant, we need to rebuild it as a regular
11262 // constant rather than negating the original operand.
11263 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
11264 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
11265 }
11266
11267 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
11268
11269 SDValue SlideDown =
11270 getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1,
11271 DownOffset, TrueMask, UpOffset);
11272 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
11273 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
11275}
11276
11277SDValue
11278RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
11279 SelectionDAG &DAG) const {
11280 SDLoc DL(Op);
11281 auto *Load = cast<LoadSDNode>(Op);
11282
11284 Load->getMemoryVT(),
11285 *Load->getMemOperand()) &&
11286 "Expecting a correctly-aligned load");
11287
11288 MVT VT = Op.getSimpleValueType();
11289 MVT XLenVT = Subtarget.getXLenVT();
11290 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11291
11292 // If we know the exact VLEN and our fixed length vector completely fills
11293 // the container, use a whole register load instead.
11294 const auto [MinVLMAX, MaxVLMAX] =
11295 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
11296 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
11297 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
11298 MachineMemOperand *MMO = Load->getMemOperand();
11299 SDValue NewLoad =
11300 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
11301 MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),
11302 MMO->getAAInfo(), MMO->getRanges());
11303 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
11304 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
11305 }
11306
11307 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
11308
11309 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
11310 SDValue IntID = DAG.getTargetConstant(
11311 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
11312 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
11313 if (!IsMaskOp)
11314 Ops.push_back(DAG.getUNDEF(ContainerVT));
11315 Ops.push_back(Load->getBasePtr());
11316 Ops.push_back(VL);
11317 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11318 SDValue NewLoad =
11320 Load->getMemoryVT(), Load->getMemOperand());
11321
11322 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
11323 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
11324}
11325
11326SDValue
11327RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
11328 SelectionDAG &DAG) const {
11329 SDLoc DL(Op);
11330 auto *Store = cast<StoreSDNode>(Op);
11331
11333 Store->getMemoryVT(),
11334 *Store->getMemOperand()) &&
11335 "Expecting a correctly-aligned store");
11336
11337 SDValue StoreVal = Store->getValue();
11338 MVT VT = StoreVal.getSimpleValueType();
11339 MVT XLenVT = Subtarget.getXLenVT();
11340
11341 // If the size less than a byte, we need to pad with zeros to make a byte.
11342 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
11343 VT = MVT::v8i1;
11344 StoreVal =
11345 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getConstant(0, DL, VT),
11346 StoreVal, DAG.getVectorIdxConstant(0, DL));
11347 }
11348
11349 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11350
11351 SDValue NewValue =
11352 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
11353
11354 // If we know the exact VLEN and our fixed length vector completely fills
11355 // the container, use a whole register store instead.
11356 const auto [MinVLMAX, MaxVLMAX] =
11357 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
11358 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
11359 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
11360 MachineMemOperand *MMO = Store->getMemOperand();
11361 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
11362 MMO->getPointerInfo(), MMO->getBaseAlign(),
11363 MMO->getFlags(), MMO->getAAInfo());
11364 }
11365
11366 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
11367
11368 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
11369 SDValue IntID = DAG.getTargetConstant(
11370 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
11371 return DAG.getMemIntrinsicNode(
11372 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
11373 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
11374 Store->getMemoryVT(), Store->getMemOperand());
11375}
11376
11377SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
11378 SelectionDAG &DAG) const {
11379 SDLoc DL(Op);
11380 MVT VT = Op.getSimpleValueType();
11381
11382 const auto *MemSD = cast<MemSDNode>(Op);
11383 EVT MemVT = MemSD->getMemoryVT();
11384 MachineMemOperand *MMO = MemSD->getMemOperand();
11385 SDValue Chain = MemSD->getChain();
11386 SDValue BasePtr = MemSD->getBasePtr();
11387
11388 SDValue Mask, PassThru, VL;
11389 bool IsExpandingLoad = false;
11390 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
11391 Mask = VPLoad->getMask();
11392 PassThru = DAG.getUNDEF(VT);
11393 VL = VPLoad->getVectorLength();
11394 } else {
11395 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
11396 Mask = MLoad->getMask();
11397 PassThru = MLoad->getPassThru();
11398 IsExpandingLoad = MLoad->isExpandingLoad();
11399 }
11400
11401 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11402
11403 MVT XLenVT = Subtarget.getXLenVT();
11404
11405 MVT ContainerVT = VT;
11406 if (VT.isFixedLengthVector()) {
11407 ContainerVT = getContainerForFixedLengthVector(VT);
11408 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
11409 if (!IsUnmasked) {
11410 MVT MaskVT = getMaskTypeFor(ContainerVT);
11411 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11412 }
11413 }
11414
11415 if (!VL)
11416 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11417
11418 SDValue ExpandingVL;
11419 if (!IsUnmasked && IsExpandingLoad) {
11420 ExpandingVL = VL;
11421 VL =
11422 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
11423 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
11424 }
11425
11426 unsigned IntID = IsUnmasked || IsExpandingLoad ? Intrinsic::riscv_vle
11427 : Intrinsic::riscv_vle_mask;
11428 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11429 if (IntID == Intrinsic::riscv_vle)
11430 Ops.push_back(DAG.getUNDEF(ContainerVT));
11431 else
11432 Ops.push_back(PassThru);
11433 Ops.push_back(BasePtr);
11434 if (IntID == Intrinsic::riscv_vle_mask)
11435 Ops.push_back(Mask);
11436 Ops.push_back(VL);
11437 if (IntID == Intrinsic::riscv_vle_mask)
11438 Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
11439
11440 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11441
11442 SDValue Result =
11443 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
11444 Chain = Result.getValue(1);
11445 if (ExpandingVL) {
11446 MVT IndexVT = ContainerVT;
11447 if (ContainerVT.isFloatingPoint())
11448 IndexVT = ContainerVT.changeVectorElementTypeToInteger();
11449
11450 MVT IndexEltVT = IndexVT.getVectorElementType();
11451 bool UseVRGATHEREI16 = false;
11452 // If index vector is an i8 vector and the element count exceeds 256, we
11453 // should change the element type of index vector to i16 to avoid
11454 // overflow.
11455 if (IndexEltVT == MVT::i8 && VT.getVectorNumElements() > 256) {
11456 // FIXME: We need to do vector splitting manually for LMUL=8 cases.
11457 assert(getLMUL(IndexVT) != RISCVII::LMUL_8);
11458 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
11459 UseVRGATHEREI16 = true;
11460 }
11461
11462 SDValue Iota =
11463 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
11464 DAG.getConstant(Intrinsic::riscv_viota, DL, XLenVT),
11465 DAG.getUNDEF(IndexVT), Mask, ExpandingVL);
11466 Result =
11467 DAG.getNode(UseVRGATHEREI16 ? RISCVISD::VRGATHEREI16_VV_VL
11469 DL, ContainerVT, Result, Iota, PassThru, Mask, ExpandingVL);
11470 }
11471
11472 if (VT.isFixedLengthVector())
11473 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11474
11475 return DAG.getMergeValues({Result, Chain}, DL);
11476}
11477
11478SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
11479 SelectionDAG &DAG) const {
11480 SDLoc DL(Op);
11481
11482 const auto *MemSD = cast<MemSDNode>(Op);
11483 EVT MemVT = MemSD->getMemoryVT();
11484 MachineMemOperand *MMO = MemSD->getMemOperand();
11485 SDValue Chain = MemSD->getChain();
11486 SDValue BasePtr = MemSD->getBasePtr();
11487 SDValue Val, Mask, VL;
11488
11489 bool IsCompressingStore = false;
11490 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
11491 Val = VPStore->getValue();
11492 Mask = VPStore->getMask();
11493 VL = VPStore->getVectorLength();
11494 } else {
11495 const auto *MStore = cast<MaskedStoreSDNode>(Op);
11496 Val = MStore->getValue();
11497 Mask = MStore->getMask();
11498 IsCompressingStore = MStore->isCompressingStore();
11499 }
11500
11501 bool IsUnmasked =
11502 ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
11503
11504 MVT VT = Val.getSimpleValueType();
11505 MVT XLenVT = Subtarget.getXLenVT();
11506
11507 MVT ContainerVT = VT;
11508 if (VT.isFixedLengthVector()) {
11509 ContainerVT = getContainerForFixedLengthVector(VT);
11510
11511 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
11512 if (!IsUnmasked || IsCompressingStore) {
11513 MVT MaskVT = getMaskTypeFor(ContainerVT);
11514 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11515 }
11516 }
11517
11518 if (!VL)
11519 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11520
11521 if (IsCompressingStore) {
11522 Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
11523 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
11524 DAG.getUNDEF(ContainerVT), Val, Mask, VL);
11525 VL =
11526 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
11527 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
11528 }
11529
11530 unsigned IntID =
11531 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
11532 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11533 Ops.push_back(Val);
11534 Ops.push_back(BasePtr);
11535 if (!IsUnmasked)
11536 Ops.push_back(Mask);
11537 Ops.push_back(VL);
11538
11540 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
11541}
11542
11543SDValue RISCVTargetLowering::lowerVectorCompress(SDValue Op,
11544 SelectionDAG &DAG) const {
11545 SDLoc DL(Op);
11546 SDValue Val = Op.getOperand(0);
11547 SDValue Mask = Op.getOperand(1);
11548 SDValue Passthru = Op.getOperand(2);
11549
11550 MVT VT = Val.getSimpleValueType();
11551 MVT XLenVT = Subtarget.getXLenVT();
11552 MVT ContainerVT = VT;
11553 if (VT.isFixedLengthVector()) {
11554 ContainerVT = getContainerForFixedLengthVector(VT);
11555 MVT MaskVT = getMaskTypeFor(ContainerVT);
11556 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
11557 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11558 Passthru = convertToScalableVector(ContainerVT, Passthru, DAG, Subtarget);
11559 }
11560
11561 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11562 SDValue Res =
11563 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
11564 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
11565 Passthru, Val, Mask, VL);
11566
11567 if (VT.isFixedLengthVector())
11568 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
11569
11570 return Res;
11571}
11572
11573SDValue
11574RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
11575 SelectionDAG &DAG) const {
11576 MVT InVT = Op.getOperand(0).getSimpleValueType();
11577 MVT ContainerVT = getContainerForFixedLengthVector(InVT);
11578
11579 MVT VT = Op.getSimpleValueType();
11580
11581 SDValue Op1 =
11582 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
11583 SDValue Op2 =
11584 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
11585
11586 SDLoc DL(Op);
11587 auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL,
11588 DAG, Subtarget);
11589 MVT MaskVT = getMaskTypeFor(ContainerVT);
11590
11591 SDValue Cmp =
11592 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
11593 {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL});
11594
11595 return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
11596}
11597
11598SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
11599 SelectionDAG &DAG) const {
11600 unsigned Opc = Op.getOpcode();
11601 SDLoc DL(Op);
11602 SDValue Chain = Op.getOperand(0);
11603 SDValue Op1 = Op.getOperand(1);
11604 SDValue Op2 = Op.getOperand(2);
11605 SDValue CC = Op.getOperand(3);
11606 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
11607 MVT VT = Op.getSimpleValueType();
11608 MVT InVT = Op1.getSimpleValueType();
11609
11610 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
11611 // condition code.
11612 if (Opc == ISD::STRICT_FSETCCS) {
11613 // Expand strict_fsetccs(x, oeq) to
11614 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
11615 SDVTList VTList = Op->getVTList();
11616 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
11617 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
11618 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
11619 Op2, OLECCVal);
11620 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
11621 Op1, OLECCVal);
11622 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
11623 Tmp1.getValue(1), Tmp2.getValue(1));
11624 // Tmp1 and Tmp2 might be the same node.
11625 if (Tmp1 != Tmp2)
11626 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
11627 return DAG.getMergeValues({Tmp1, OutChain}, DL);
11628 }
11629
11630 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
11631 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
11632 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
11633 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
11634 Op2, OEQCCVal);
11635 SDValue Res = DAG.getNOT(DL, OEQ, VT);
11636 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
11637 }
11638 }
11639
11640 MVT ContainerInVT = InVT;
11641 if (InVT.isFixedLengthVector()) {
11642 ContainerInVT = getContainerForFixedLengthVector(InVT);
11643 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
11644 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
11645 }
11646 MVT MaskVT = getMaskTypeFor(ContainerInVT);
11647
11648 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
11649
11650 SDValue Res;
11651 if (Opc == ISD::STRICT_FSETCC &&
11652 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
11653 CCVal == ISD::SETOLE)) {
11654 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
11655 // active when both input elements are ordered.
11656 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
11657 SDValue OrderMask1 = DAG.getNode(
11658 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
11659 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
11660 True, VL});
11661 SDValue OrderMask2 = DAG.getNode(
11662 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
11663 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
11664 True, VL});
11665 Mask =
11666 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
11667 // Use Mask as the passthru operand to let the result be 0 if either of the
11668 // inputs is unordered.
11670 DAG.getVTList(MaskVT, MVT::Other),
11671 {Chain, Op1, Op2, CC, Mask, Mask, VL});
11672 } else {
11673 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
11675 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
11676 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
11677 }
11678
11679 if (VT.isFixedLengthVector()) {
11680 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
11681 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
11682 }
11683 return Res;
11684}
11685
11686// Lower vector ABS to smax(X, sub(0, X)).
11687SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
11688 SDLoc DL(Op);
11689 MVT VT = Op.getSimpleValueType();
11690 SDValue X = Op.getOperand(0);
11691
11692 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
11693 "Unexpected type for ISD::ABS");
11694
11695 MVT ContainerVT = VT;
11696 if (VT.isFixedLengthVector()) {
11697 ContainerVT = getContainerForFixedLengthVector(VT);
11698 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
11699 }
11700
11701 SDValue Mask, VL;
11702 if (Op->getOpcode() == ISD::VP_ABS) {
11703 Mask = Op->getOperand(1);
11704 if (VT.isFixedLengthVector())
11705 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
11706 Subtarget);
11707 VL = Op->getOperand(2);
11708 } else
11709 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11710
11711 SDValue SplatZero = DAG.getNode(
11712 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
11713 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
11714 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
11715 DAG.getUNDEF(ContainerVT), Mask, VL);
11716 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
11717 DAG.getUNDEF(ContainerVT), Mask, VL);
11718
11719 if (VT.isFixedLengthVector())
11720 Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
11721 return Max;
11722}
11723
11724SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
11725 SDValue Op, SelectionDAG &DAG) const {
11726 SDLoc DL(Op);
11727 MVT VT = Op.getSimpleValueType();
11728 SDValue Mag = Op.getOperand(0);
11729 SDValue Sign = Op.getOperand(1);
11730 assert(Mag.getValueType() == Sign.getValueType() &&
11731 "Can only handle COPYSIGN with matching types.");
11732
11733 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11734 Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
11735 Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
11736
11737 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11738
11739 SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag,
11740 Sign, DAG.getUNDEF(ContainerVT), Mask, VL);
11741
11742 return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
11743}
11744
11745SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
11746 SDValue Op, SelectionDAG &DAG) const {
11747 MVT VT = Op.getSimpleValueType();
11748 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11749
11750 MVT I1ContainerVT =
11751 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
11752
11753 SDValue CC =
11754 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
11755 SDValue Op1 =
11756 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
11757 SDValue Op2 =
11758 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
11759
11760 SDLoc DL(Op);
11761 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11762
11763 SDValue Select = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, Op1,
11764 Op2, DAG.getUNDEF(ContainerVT), VL);
11765
11766 return convertFromScalableVector(VT, Select, DAG, Subtarget);
11767}
11768
11769SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
11770 SelectionDAG &DAG) const {
11771 unsigned NewOpc = getRISCVVLOp(Op);
11772 bool HasPassthruOp = hasPassthruOp(NewOpc);
11773 bool HasMask = hasMaskOp(NewOpc);
11774
11775 MVT VT = Op.getSimpleValueType();
11776 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11777
11778 // Create list of operands by converting existing ones to scalable types.
11780 for (const SDValue &V : Op->op_values()) {
11781 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
11782
11783 // Pass through non-vector operands.
11784 if (!V.getValueType().isVector()) {
11785 Ops.push_back(V);
11786 continue;
11787 }
11788
11789 // "cast" fixed length vector to a scalable vector.
11790 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
11791 "Only fixed length vectors are supported!");
11792 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
11793 }
11794
11795 SDLoc DL(Op);
11796 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11797 if (HasPassthruOp)
11798 Ops.push_back(DAG.getUNDEF(ContainerVT));
11799 if (HasMask)
11800 Ops.push_back(Mask);
11801 Ops.push_back(VL);
11802
11803 // StrictFP operations have two result values. Their lowered result should
11804 // have same result count.
11805 if (Op->isStrictFPOpcode()) {
11806 SDValue ScalableRes =
11807 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
11808 Op->getFlags());
11809 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
11810 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
11811 }
11812
11813 SDValue ScalableRes =
11814 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
11815 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
11816}
11817
11818// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
11819// * Operands of each node are assumed to be in the same order.
11820// * The EVL operand is promoted from i32 to i64 on RV64.
11821// * Fixed-length vectors are converted to their scalable-vector container
11822// types.
11823SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
11824 unsigned RISCVISDOpc = getRISCVVLOp(Op);
11825 bool HasPassthruOp = hasPassthruOp(RISCVISDOpc);
11826
11827 SDLoc DL(Op);
11828 MVT VT = Op.getSimpleValueType();
11830
11831 MVT ContainerVT = VT;
11832 if (VT.isFixedLengthVector())
11833 ContainerVT = getContainerForFixedLengthVector(VT);
11834
11835 for (const auto &OpIdx : enumerate(Op->ops())) {
11836 SDValue V = OpIdx.value();
11837 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
11838 // Add dummy passthru value before the mask. Or if there isn't a mask,
11839 // before EVL.
11840 if (HasPassthruOp) {
11841 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());
11842 if (MaskIdx) {
11843 if (*MaskIdx == OpIdx.index())
11844 Ops.push_back(DAG.getUNDEF(ContainerVT));
11845 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
11846 OpIdx.index()) {
11847 if (Op.getOpcode() == ISD::VP_MERGE) {
11848 // For VP_MERGE, copy the false operand instead of an undef value.
11849 Ops.push_back(Ops.back());
11850 } else {
11851 assert(Op.getOpcode() == ISD::VP_SELECT);
11852 // For VP_SELECT, add an undef value.
11853 Ops.push_back(DAG.getUNDEF(ContainerVT));
11854 }
11855 }
11856 }
11857 // VFCVT_RM_X_F_VL requires a rounding mode to be injected before the VL.
11858 if (RISCVISDOpc == RISCVISD::VFCVT_RM_X_F_VL &&
11859 ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == OpIdx.index())
11861 Subtarget.getXLenVT()));
11862 // Pass through operands which aren't fixed-length vectors.
11863 if (!V.getValueType().isFixedLengthVector()) {
11864 Ops.push_back(V);
11865 continue;
11866 }
11867 // "cast" fixed length vector to a scalable vector.
11868 MVT OpVT = V.getSimpleValueType();
11869 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
11870 assert(useRVVForFixedLengthVectorVT(OpVT) &&
11871 "Only fixed length vectors are supported!");
11872 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
11873 }
11874
11875 if (!VT.isFixedLengthVector())
11876 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
11877
11878 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
11879
11880 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
11881}
11882
11883SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
11884 SelectionDAG &DAG) const {
11885 SDLoc DL(Op);
11886 MVT VT = Op.getSimpleValueType();
11887
11888 SDValue Src = Op.getOperand(0);
11889 // NOTE: Mask is dropped.
11890 SDValue VL = Op.getOperand(2);
11891
11892 MVT ContainerVT = VT;
11893 if (VT.isFixedLengthVector()) {
11894 ContainerVT = getContainerForFixedLengthVector(VT);
11895 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
11896 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
11897 }
11898
11899 MVT XLenVT = Subtarget.getXLenVT();
11900 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11901 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11902 DAG.getUNDEF(ContainerVT), Zero, VL);
11903
11904 SDValue SplatValue = DAG.getSignedConstant(
11905 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
11906 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11907 DAG.getUNDEF(ContainerVT), SplatValue, VL);
11908
11909 SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat,
11910 ZeroSplat, DAG.getUNDEF(ContainerVT), VL);
11911 if (!VT.isFixedLengthVector())
11912 return Result;
11913 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11914}
11915
11916SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
11917 SelectionDAG &DAG) const {
11918 SDLoc DL(Op);
11919 MVT VT = Op.getSimpleValueType();
11920
11921 SDValue Op1 = Op.getOperand(0);
11922 SDValue Op2 = Op.getOperand(1);
11923 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
11924 // NOTE: Mask is dropped.
11925 SDValue VL = Op.getOperand(4);
11926
11927 MVT ContainerVT = VT;
11928 if (VT.isFixedLengthVector()) {
11929 ContainerVT = getContainerForFixedLengthVector(VT);
11930 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11931 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11932 }
11933
11935 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
11936
11937 switch (Condition) {
11938 default:
11939 break;
11940 // X != Y --> (X^Y)
11941 case ISD::SETNE:
11942 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
11943 break;
11944 // X == Y --> ~(X^Y)
11945 case ISD::SETEQ: {
11946 SDValue Temp =
11947 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
11948 Result =
11949 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
11950 break;
11951 }
11952 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
11953 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
11954 case ISD::SETGT:
11955 case ISD::SETULT: {
11956 SDValue Temp =
11957 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
11958 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
11959 break;
11960 }
11961 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
11962 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
11963 case ISD::SETLT:
11964 case ISD::SETUGT: {
11965 SDValue Temp =
11966 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
11967 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
11968 break;
11969 }
11970 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
11971 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
11972 case ISD::SETGE:
11973 case ISD::SETULE: {
11974 SDValue Temp =
11975 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
11976 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
11977 break;
11978 }
11979 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
11980 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
11981 case ISD::SETLE:
11982 case ISD::SETUGE: {
11983 SDValue Temp =
11984 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
11985 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
11986 break;
11987 }
11988 }
11989
11990 if (!VT.isFixedLengthVector())
11991 return Result;
11992 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11993}
11994
11995// Lower Floating-Point/Integer Type-Convert VP SDNodes
11996SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
11997 SelectionDAG &DAG) const {
11998 SDLoc DL(Op);
11999
12000 SDValue Src = Op.getOperand(0);
12001 SDValue Mask = Op.getOperand(1);
12002 SDValue VL = Op.getOperand(2);
12003 unsigned RISCVISDOpc = getRISCVVLOp(Op);
12004
12005 MVT DstVT = Op.getSimpleValueType();
12006 MVT SrcVT = Src.getSimpleValueType();
12007 if (DstVT.isFixedLengthVector()) {
12008 DstVT = getContainerForFixedLengthVector(DstVT);
12009 SrcVT = getContainerForFixedLengthVector(SrcVT);
12010 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
12011 MVT MaskVT = getMaskTypeFor(DstVT);
12012 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12013 }
12014
12015 unsigned DstEltSize = DstVT.getScalarSizeInBits();
12016 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
12017
12019 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
12020 if (SrcVT.isInteger()) {
12021 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
12022
12023 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
12026
12027 // Do we need to do any pre-widening before converting?
12028 if (SrcEltSize == 1) {
12029 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
12030 MVT XLenVT = Subtarget.getXLenVT();
12031 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
12032 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
12033 DAG.getUNDEF(IntVT), Zero, VL);
12034 SDValue One = DAG.getSignedConstant(
12035 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
12036 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
12037 DAG.getUNDEF(IntVT), One, VL);
12038 Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat,
12039 ZeroSplat, DAG.getUNDEF(IntVT), VL);
12040 } else if (DstEltSize > (2 * SrcEltSize)) {
12041 // Widen before converting.
12042 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
12043 DstVT.getVectorElementCount());
12044 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
12045 }
12046
12047 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
12048 } else {
12049 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
12050 "Wrong input/output vector types");
12051
12052 // Convert f16 to f32 then convert f32 to i64.
12053 if (DstEltSize > (2 * SrcEltSize)) {
12054 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
12055 MVT InterimFVT =
12056 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
12057 Src =
12058 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
12059 }
12060
12061 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
12062 }
12063 } else { // Narrowing + Conversion
12064 if (SrcVT.isInteger()) {
12065 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
12066 // First do a narrowing convert to an FP type half the size, then round
12067 // the FP type to a small FP type if needed.
12068
12069 MVT InterimFVT = DstVT;
12070 if (SrcEltSize > (2 * DstEltSize)) {
12071 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
12072 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
12073 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
12074 }
12075
12076 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
12077
12078 if (InterimFVT != DstVT) {
12079 Src = Result;
12080 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
12081 }
12082 } else {
12083 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
12084 "Wrong input/output vector types");
12085 // First do a narrowing conversion to an integer half the size, then
12086 // truncate if needed.
12087
12088 if (DstEltSize == 1) {
12089 // First convert to the same size integer, then convert to mask using
12090 // setcc.
12091 assert(SrcEltSize >= 16 && "Unexpected FP type!");
12092 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
12093 DstVT.getVectorElementCount());
12094 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
12095
12096 // Compare the integer result to 0. The integer should be 0 or 1/-1,
12097 // otherwise the conversion was undefined.
12098 MVT XLenVT = Subtarget.getXLenVT();
12099 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
12100 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
12101 DAG.getUNDEF(InterimIVT), SplatZero, VL);
12102 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
12103 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
12104 DAG.getUNDEF(DstVT), Mask, VL});
12105 } else {
12106 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
12107 DstVT.getVectorElementCount());
12108
12109 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
12110
12111 while (InterimIVT != DstVT) {
12112 SrcEltSize /= 2;
12113 Src = Result;
12114 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
12115 DstVT.getVectorElementCount());
12116 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
12117 Src, Mask, VL);
12118 }
12119 }
12120 }
12121 }
12122
12123 MVT VT = Op.getSimpleValueType();
12124 if (!VT.isFixedLengthVector())
12125 return Result;
12126 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12127}
12128
12129SDValue RISCVTargetLowering::lowerVPMergeMask(SDValue Op,
12130 SelectionDAG &DAG) const {
12131 SDLoc DL(Op);
12132 MVT VT = Op.getSimpleValueType();
12133 MVT XLenVT = Subtarget.getXLenVT();
12134
12135 SDValue Mask = Op.getOperand(0);
12136 SDValue TrueVal = Op.getOperand(1);
12137 SDValue FalseVal = Op.getOperand(2);
12138 SDValue VL = Op.getOperand(3);
12139
12140 // Use default legalization if a vector of EVL type would be legal.
12141 EVT EVLVecVT = EVT::getVectorVT(*DAG.getContext(), VL.getValueType(),
12143 if (isTypeLegal(EVLVecVT))
12144 return SDValue();
12145
12146 MVT ContainerVT = VT;
12147 if (VT.isFixedLengthVector()) {
12148 ContainerVT = getContainerForFixedLengthVector(VT);
12149 Mask = convertToScalableVector(ContainerVT, Mask, DAG, Subtarget);
12150 TrueVal = convertToScalableVector(ContainerVT, TrueVal, DAG, Subtarget);
12151 FalseVal = convertToScalableVector(ContainerVT, FalseVal, DAG, Subtarget);
12152 }
12153
12154 // Promote to a vector of i8.
12155 MVT PromotedVT = ContainerVT.changeVectorElementType(MVT::i8);
12156
12157 // Promote TrueVal and FalseVal using VLMax.
12158 // FIXME: Is there a better way to do this?
12159 SDValue VLMax = DAG.getRegister(RISCV::X0, XLenVT);
12160 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
12161 DAG.getUNDEF(PromotedVT),
12162 DAG.getConstant(1, DL, XLenVT), VLMax);
12163 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
12164 DAG.getUNDEF(PromotedVT),
12165 DAG.getConstant(0, DL, XLenVT), VLMax);
12166 TrueVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, TrueVal, SplatOne,
12167 SplatZero, DAG.getUNDEF(PromotedVT), VL);
12168 // Any element past VL uses FalseVal, so use VLMax
12169 FalseVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, FalseVal,
12170 SplatOne, SplatZero, DAG.getUNDEF(PromotedVT), VLMax);
12171
12172 // VP_MERGE the two promoted values.
12173 SDValue VPMerge = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, Mask,
12174 TrueVal, FalseVal, FalseVal, VL);
12175
12176 // Convert back to mask.
12177 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
12178 SDValue Result = DAG.getNode(
12179 RISCVISD::SETCC_VL, DL, ContainerVT,
12180 {VPMerge, DAG.getConstant(0, DL, PromotedVT), DAG.getCondCode(ISD::SETNE),
12181 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), TrueMask, VLMax});
12182
12183 if (VT.isFixedLengthVector())
12184 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12185 return Result;
12186}
12187
12188SDValue
12189RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
12190 SelectionDAG &DAG) const {
12191 SDLoc DL(Op);
12192
12193 SDValue Op1 = Op.getOperand(0);
12194 SDValue Op2 = Op.getOperand(1);
12195 SDValue Offset = Op.getOperand(2);
12196 SDValue Mask = Op.getOperand(3);
12197 SDValue EVL1 = Op.getOperand(4);
12198 SDValue EVL2 = Op.getOperand(5);
12199
12200 const MVT XLenVT = Subtarget.getXLenVT();
12201 MVT VT = Op.getSimpleValueType();
12202 MVT ContainerVT = VT;
12203 if (VT.isFixedLengthVector()) {
12204 ContainerVT = getContainerForFixedLengthVector(VT);
12205 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
12206 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
12207 MVT MaskVT = getMaskTypeFor(ContainerVT);
12208 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12209 }
12210
12211 bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
12212 if (IsMaskVector) {
12213 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
12214
12215 // Expand input operands
12216 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12217 DAG.getUNDEF(ContainerVT),
12218 DAG.getConstant(1, DL, XLenVT), EVL1);
12219 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12220 DAG.getUNDEF(ContainerVT),
12221 DAG.getConstant(0, DL, XLenVT), EVL1);
12222 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1,
12223 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);
12224
12225 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12226 DAG.getUNDEF(ContainerVT),
12227 DAG.getConstant(1, DL, XLenVT), EVL2);
12228 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12229 DAG.getUNDEF(ContainerVT),
12230 DAG.getConstant(0, DL, XLenVT), EVL2);
12231 Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2,
12232 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
12233 }
12234
12235 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
12236 SDValue DownOffset, UpOffset;
12237 if (ImmValue >= 0) {
12238 // The operand is a TargetConstant, we need to rebuild it as a regular
12239 // constant.
12240 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
12241 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);
12242 } else {
12243 // The operand is a TargetConstant, we need to rebuild it as a regular
12244 // constant rather than negating the original operand.
12245 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
12246 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
12247 }
12248
12249 SDValue SlideDown =
12250 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
12251 Op1, DownOffset, Mask, UpOffset);
12252 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, SlideDown, Op2,
12253 UpOffset, Mask, EVL2, RISCVII::TAIL_AGNOSTIC);
12254
12255 if (IsMaskVector) {
12256 // Truncate Result back to a mask vector (Result has same EVL as Op2)
12257 Result = DAG.getNode(
12258 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
12259 {Result, DAG.getConstant(0, DL, ContainerVT),
12260 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
12261 Mask, EVL2});
12262 }
12263
12264 if (!VT.isFixedLengthVector())
12265 return Result;
12266 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12267}
12268
12269SDValue RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op,
12270 SelectionDAG &DAG) const {
12271 SDLoc DL(Op);
12272 SDValue Val = Op.getOperand(0);
12273 SDValue Mask = Op.getOperand(1);
12274 SDValue VL = Op.getOperand(2);
12275 MVT VT = Op.getSimpleValueType();
12276
12277 MVT ContainerVT = VT;
12278 if (VT.isFixedLengthVector()) {
12279 ContainerVT = getContainerForFixedLengthVector(VT);
12280 MVT MaskVT = getMaskTypeFor(ContainerVT);
12281 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12282 }
12283
12284 SDValue Result =
12285 lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL, DAG, Subtarget);
12286
12287 if (!VT.isFixedLengthVector())
12288 return Result;
12289 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12290}
12291
12292SDValue
12293RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
12294 SelectionDAG &DAG) const {
12295 SDLoc DL(Op);
12296 MVT VT = Op.getSimpleValueType();
12297 MVT XLenVT = Subtarget.getXLenVT();
12298
12299 SDValue Op1 = Op.getOperand(0);
12300 SDValue Mask = Op.getOperand(1);
12301 SDValue EVL = Op.getOperand(2);
12302
12303 MVT ContainerVT = VT;
12304 if (VT.isFixedLengthVector()) {
12305 ContainerVT = getContainerForFixedLengthVector(VT);
12306 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
12307 MVT MaskVT = getMaskTypeFor(ContainerVT);
12308 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12309 }
12310
12311 MVT GatherVT = ContainerVT;
12312 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
12313 // Check if we are working with mask vectors
12314 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
12315 if (IsMaskVector) {
12316 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
12317
12318 // Expand input operand
12319 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
12320 DAG.getUNDEF(IndicesVT),
12321 DAG.getConstant(1, DL, XLenVT), EVL);
12322 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
12323 DAG.getUNDEF(IndicesVT),
12324 DAG.getConstant(0, DL, XLenVT), EVL);
12325 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne,
12326 SplatZero, DAG.getUNDEF(IndicesVT), EVL);
12327 }
12328
12329 unsigned EltSize = GatherVT.getScalarSizeInBits();
12330 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
12331 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
12332 unsigned MaxVLMAX =
12333 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
12334
12335 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
12336 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
12337 // to use vrgatherei16.vv.
12338 // TODO: It's also possible to use vrgatherei16.vv for other types to
12339 // decrease register width for the index calculation.
12340 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
12341 if (MaxVLMAX > 256 && EltSize == 8) {
12342 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
12343 // Split the vector in half and reverse each half using a full register
12344 // reverse.
12345 // Swap the halves and concatenate them.
12346 // Slide the concatenated result by (VLMax - VL).
12347 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
12348 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
12349 auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
12350
12351 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
12352 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
12353
12354 // Reassemble the low and high pieces reversed.
12355 // NOTE: this Result is unmasked (because we do not need masks for
12356 // shuffles). If in the future this has to change, we can use a SELECT_VL
12357 // between Result and UNDEF using the mask originally passed to VP_REVERSE
12358 SDValue Result =
12359 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
12360
12361 // Slide off any elements from past EVL that were reversed into the low
12362 // elements.
12363 unsigned MinElts = GatherVT.getVectorMinNumElements();
12364 SDValue VLMax =
12365 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), MinElts));
12366 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
12367
12368 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
12369 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
12370
12371 if (IsMaskVector) {
12372 // Truncate Result back to a mask vector
12373 Result =
12374 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
12375 {Result, DAG.getConstant(0, DL, GatherVT),
12377 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
12378 }
12379
12380 if (!VT.isFixedLengthVector())
12381 return Result;
12382 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12383 }
12384
12385 // Just promote the int type to i16 which will double the LMUL.
12386 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
12387 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
12388 }
12389
12390 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
12391 SDValue VecLen =
12392 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
12393 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
12394 DAG.getUNDEF(IndicesVT), VecLen, EVL);
12395 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
12396 DAG.getUNDEF(IndicesVT), Mask, EVL);
12397 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
12398 DAG.getUNDEF(GatherVT), Mask, EVL);
12399
12400 if (IsMaskVector) {
12401 // Truncate Result back to a mask vector
12402 Result = DAG.getNode(
12403 RISCVISD::SETCC_VL, DL, ContainerVT,
12404 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
12405 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
12406 }
12407
12408 if (!VT.isFixedLengthVector())
12409 return Result;
12410 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12411}
12412
12413SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
12414 SelectionDAG &DAG) const {
12415 MVT VT = Op.getSimpleValueType();
12416 if (VT.getVectorElementType() != MVT::i1)
12417 return lowerVPOp(Op, DAG);
12418
12419 // It is safe to drop mask parameter as masked-off elements are undef.
12420 SDValue Op1 = Op->getOperand(0);
12421 SDValue Op2 = Op->getOperand(1);
12422 SDValue VL = Op->getOperand(3);
12423
12424 MVT ContainerVT = VT;
12425 const bool IsFixed = VT.isFixedLengthVector();
12426 if (IsFixed) {
12427 ContainerVT = getContainerForFixedLengthVector(VT);
12428 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
12429 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
12430 }
12431
12432 SDLoc DL(Op);
12433 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
12434 if (!IsFixed)
12435 return Val;
12436 return convertFromScalableVector(VT, Val, DAG, Subtarget);
12437}
12438
12439SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
12440 SelectionDAG &DAG) const {
12441 SDLoc DL(Op);
12442 MVT XLenVT = Subtarget.getXLenVT();
12443 MVT VT = Op.getSimpleValueType();
12444 MVT ContainerVT = VT;
12445 if (VT.isFixedLengthVector())
12446 ContainerVT = getContainerForFixedLengthVector(VT);
12447
12448 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12449
12450 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
12451 // Check if the mask is known to be all ones
12452 SDValue Mask = VPNode->getMask();
12453 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12454
12455 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
12456 : Intrinsic::riscv_vlse_mask,
12457 DL, XLenVT);
12458 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
12459 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
12460 VPNode->getStride()};
12461 if (!IsUnmasked) {
12462 if (VT.isFixedLengthVector()) {
12463 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
12464 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12465 }
12466 Ops.push_back(Mask);
12467 }
12468 Ops.push_back(VPNode->getVectorLength());
12469 if (!IsUnmasked) {
12470 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
12471 Ops.push_back(Policy);
12472 }
12473
12474 SDValue Result =
12476 VPNode->getMemoryVT(), VPNode->getMemOperand());
12477 SDValue Chain = Result.getValue(1);
12478
12479 if (VT.isFixedLengthVector())
12480 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12481
12482 return DAG.getMergeValues({Result, Chain}, DL);
12483}
12484
12485SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
12486 SelectionDAG &DAG) const {
12487 SDLoc DL(Op);
12488 MVT XLenVT = Subtarget.getXLenVT();
12489
12490 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
12491 SDValue StoreVal = VPNode->getValue();
12492 MVT VT = StoreVal.getSimpleValueType();
12493 MVT ContainerVT = VT;
12494 if (VT.isFixedLengthVector()) {
12495 ContainerVT = getContainerForFixedLengthVector(VT);
12496 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
12497 }
12498
12499 // Check if the mask is known to be all ones
12500 SDValue Mask = VPNode->getMask();
12501 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12502
12503 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
12504 : Intrinsic::riscv_vsse_mask,
12505 DL, XLenVT);
12506 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
12507 VPNode->getBasePtr(), VPNode->getStride()};
12508 if (!IsUnmasked) {
12509 if (VT.isFixedLengthVector()) {
12510 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
12511 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12512 }
12513 Ops.push_back(Mask);
12514 }
12515 Ops.push_back(VPNode->getVectorLength());
12516
12517 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
12518 Ops, VPNode->getMemoryVT(),
12519 VPNode->getMemOperand());
12520}
12521
12522// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
12523// matched to a RVV indexed load. The RVV indexed load instructions only
12524// support the "unsigned unscaled" addressing mode; indices are implicitly
12525// zero-extended or truncated to XLEN and are treated as byte offsets. Any
12526// signed or scaled indexing is extended to the XLEN value type and scaled
12527// accordingly.
12528SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
12529 SelectionDAG &DAG) const {
12530 SDLoc DL(Op);
12531 MVT VT = Op.getSimpleValueType();
12532
12533 const auto *MemSD = cast<MemSDNode>(Op.getNode());
12534 EVT MemVT = MemSD->getMemoryVT();
12535 MachineMemOperand *MMO = MemSD->getMemOperand();
12536 SDValue Chain = MemSD->getChain();
12537 SDValue BasePtr = MemSD->getBasePtr();
12538
12539 [[maybe_unused]] ISD::LoadExtType LoadExtType;
12540 SDValue Index, Mask, PassThru, VL;
12541
12542 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
12543 Index = VPGN->getIndex();
12544 Mask = VPGN->getMask();
12545 PassThru = DAG.getUNDEF(VT);
12546 VL = VPGN->getVectorLength();
12547 // VP doesn't support extending loads.
12549 } else {
12550 // Else it must be a MGATHER.
12551 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
12552 Index = MGN->getIndex();
12553 Mask = MGN->getMask();
12554 PassThru = MGN->getPassThru();
12555 LoadExtType = MGN->getExtensionType();
12556 }
12557
12558 MVT IndexVT = Index.getSimpleValueType();
12559 MVT XLenVT = Subtarget.getXLenVT();
12560
12562 "Unexpected VTs!");
12563 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
12564 // Targets have to explicitly opt-in for extending vector loads.
12565 assert(LoadExtType == ISD::NON_EXTLOAD &&
12566 "Unexpected extending MGATHER/VP_GATHER");
12567
12568 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
12569 // the selection of the masked intrinsics doesn't do this for us.
12570 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12571
12572 MVT ContainerVT = VT;
12573 if (VT.isFixedLengthVector()) {
12574 ContainerVT = getContainerForFixedLengthVector(VT);
12575 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
12576 ContainerVT.getVectorElementCount());
12577
12578 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
12579
12580 if (!IsUnmasked) {
12581 MVT MaskVT = getMaskTypeFor(ContainerVT);
12582 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12583 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
12584 }
12585 }
12586
12587 if (!VL)
12588 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
12589
12590 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
12591 IndexVT = IndexVT.changeVectorElementType(XLenVT);
12592 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
12593 }
12594
12595 unsigned IntID =
12596 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
12597 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
12598 if (IsUnmasked)
12599 Ops.push_back(DAG.getUNDEF(ContainerVT));
12600 else
12601 Ops.push_back(PassThru);
12602 Ops.push_back(BasePtr);
12603 Ops.push_back(Index);
12604 if (!IsUnmasked)
12605 Ops.push_back(Mask);
12606 Ops.push_back(VL);
12607 if (!IsUnmasked)
12609
12610 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12611 SDValue Result =
12612 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
12613 Chain = Result.getValue(1);
12614
12615 if (VT.isFixedLengthVector())
12616 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12617
12618 return DAG.getMergeValues({Result, Chain}, DL);
12619}
12620
12621// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
12622// matched to a RVV indexed store. The RVV indexed store instructions only
12623// support the "unsigned unscaled" addressing mode; indices are implicitly
12624// zero-extended or truncated to XLEN and are treated as byte offsets. Any
12625// signed or scaled indexing is extended to the XLEN value type and scaled
12626// accordingly.
12627SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
12628 SelectionDAG &DAG) const {
12629 SDLoc DL(Op);
12630 const auto *MemSD = cast<MemSDNode>(Op.getNode());
12631 EVT MemVT = MemSD->getMemoryVT();
12632 MachineMemOperand *MMO = MemSD->getMemOperand();
12633 SDValue Chain = MemSD->getChain();
12634 SDValue BasePtr = MemSD->getBasePtr();
12635
12636 [[maybe_unused]] bool IsTruncatingStore = false;
12637 SDValue Index, Mask, Val, VL;
12638
12639 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
12640 Index = VPSN->getIndex();
12641 Mask = VPSN->getMask();
12642 Val = VPSN->getValue();
12643 VL = VPSN->getVectorLength();
12644 // VP doesn't support truncating stores.
12645 IsTruncatingStore = false;
12646 } else {
12647 // Else it must be a MSCATTER.
12648 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
12649 Index = MSN->getIndex();
12650 Mask = MSN->getMask();
12651 Val = MSN->getValue();
12652 IsTruncatingStore = MSN->isTruncatingStore();
12653 }
12654
12655 MVT VT = Val.getSimpleValueType();
12656 MVT IndexVT = Index.getSimpleValueType();
12657 MVT XLenVT = Subtarget.getXLenVT();
12658
12660 "Unexpected VTs!");
12661 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
12662 // Targets have to explicitly opt-in for extending vector loads and
12663 // truncating vector stores.
12664 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
12665
12666 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
12667 // the selection of the masked intrinsics doesn't do this for us.
12668 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12669
12670 MVT ContainerVT = VT;
12671 if (VT.isFixedLengthVector()) {
12672 ContainerVT = getContainerForFixedLengthVector(VT);
12673 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
12674 ContainerVT.getVectorElementCount());
12675
12676 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
12677 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
12678
12679 if (!IsUnmasked) {
12680 MVT MaskVT = getMaskTypeFor(ContainerVT);
12681 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12682 }
12683 }
12684
12685 if (!VL)
12686 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
12687
12688 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
12689 IndexVT = IndexVT.changeVectorElementType(XLenVT);
12690 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
12691 }
12692
12693 unsigned IntID =
12694 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
12695 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
12696 Ops.push_back(Val);
12697 Ops.push_back(BasePtr);
12698 Ops.push_back(Index);
12699 if (!IsUnmasked)
12700 Ops.push_back(Mask);
12701 Ops.push_back(VL);
12702
12704 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
12705}
12706
12707SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
12708 SelectionDAG &DAG) const {
12709 const MVT XLenVT = Subtarget.getXLenVT();
12710 SDLoc DL(Op);
12711 SDValue Chain = Op->getOperand(0);
12712 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
12713 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
12714 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
12715
12716 // Encoding used for rounding mode in RISC-V differs from that used in
12717 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
12718 // table, which consists of a sequence of 4-bit fields, each representing
12719 // corresponding FLT_ROUNDS mode.
12720 static const int Table =
12726
12727 SDValue Shift =
12728 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
12729 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
12730 DAG.getConstant(Table, DL, XLenVT), Shift);
12731 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
12732 DAG.getConstant(7, DL, XLenVT));
12733
12734 return DAG.getMergeValues({Masked, Chain}, DL);
12735}
12736
12737SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
12738 SelectionDAG &DAG) const {
12739 const MVT XLenVT = Subtarget.getXLenVT();
12740 SDLoc DL(Op);
12741 SDValue Chain = Op->getOperand(0);
12742 SDValue RMValue = Op->getOperand(1);
12743 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
12744
12745 // Encoding used for rounding mode in RISC-V differs from that used in
12746 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
12747 // a table, which consists of a sequence of 4-bit fields, each representing
12748 // corresponding RISC-V mode.
12749 static const unsigned Table =
12755
12756 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
12757
12758 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
12759 DAG.getConstant(2, DL, XLenVT));
12760 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
12761 DAG.getConstant(Table, DL, XLenVT), Shift);
12762 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
12763 DAG.getConstant(0x7, DL, XLenVT));
12764 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
12765 RMValue);
12766}
12767
12768SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
12769 SelectionDAG &DAG) const {
12771
12772 bool isRISCV64 = Subtarget.is64Bit();
12773 EVT PtrVT = getPointerTy(DAG.getDataLayout());
12774
12775 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
12776 return DAG.getFrameIndex(FI, PtrVT);
12777}
12778
12779// Returns the opcode of the target-specific SDNode that implements the 32-bit
12780// form of the given Opcode.
12781static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
12782 switch (Opcode) {
12783 default:
12784 llvm_unreachable("Unexpected opcode");
12785 case ISD::SHL:
12786 return RISCVISD::SLLW;
12787 case ISD::SRA:
12788 return RISCVISD::SRAW;
12789 case ISD::SRL:
12790 return RISCVISD::SRLW;
12791 case ISD::SDIV:
12792 return RISCVISD::DIVW;
12793 case ISD::UDIV:
12794 return RISCVISD::DIVUW;
12795 case ISD::UREM:
12796 return RISCVISD::REMUW;
12797 case ISD::ROTL:
12798 return RISCVISD::ROLW;
12799 case ISD::ROTR:
12800 return RISCVISD::RORW;
12801 }
12802}
12803
12804// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
12805// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
12806// otherwise be promoted to i64, making it difficult to select the
12807// SLLW/DIVUW/.../*W later one because the fact the operation was originally of
12808// type i8/i16/i32 is lost.
12810 unsigned ExtOpc = ISD::ANY_EXTEND) {
12811 SDLoc DL(N);
12812 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
12813 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
12814 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
12815 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
12816 // ReplaceNodeResults requires we maintain the same type for the return value.
12817 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
12818}
12819
12820// Converts the given 32-bit operation to a i64 operation with signed extension
12821// semantic to reduce the signed extension instructions.
12823 SDLoc DL(N);
12824 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12825 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12826 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
12827 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
12828 DAG.getValueType(MVT::i32));
12829 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
12830}
12831
12834 SelectionDAG &DAG) const {
12835 SDLoc DL(N);
12836 switch (N->getOpcode()) {
12837 default:
12838 llvm_unreachable("Don't know how to custom type legalize this operation!");
12841 case ISD::FP_TO_SINT:
12842 case ISD::FP_TO_UINT: {
12843 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12844 "Unexpected custom legalisation");
12845 bool IsStrict = N->isStrictFPOpcode();
12846 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
12847 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
12848 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
12849 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
12851 if (!isTypeLegal(Op0.getValueType()))
12852 return;
12853 if (IsStrict) {
12854 SDValue Chain = N->getOperand(0);
12855 // In absense of Zfh, promote f16 to f32, then convert.
12856 if (Op0.getValueType() == MVT::f16 &&
12857 !Subtarget.hasStdExtZfhOrZhinx()) {
12858 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
12859 {Chain, Op0});
12860 Chain = Op0.getValue(1);
12861 }
12862 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
12864 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
12865 SDValue Res = DAG.getNode(
12866 Opc, DL, VTs, Chain, Op0,
12867 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
12868 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12869 Results.push_back(Res.getValue(1));
12870 return;
12871 }
12872 // For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then
12873 // convert.
12874 if ((Op0.getValueType() == MVT::f16 &&
12875 !Subtarget.hasStdExtZfhOrZhinx()) ||
12876 Op0.getValueType() == MVT::bf16)
12877 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
12878
12879 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
12880 SDValue Res =
12881 DAG.getNode(Opc, DL, MVT::i64, Op0,
12882 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
12883 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12884 return;
12885 }
12886 // If the FP type needs to be softened, emit a library call using the 'si'
12887 // version. If we left it to default legalization we'd end up with 'di'. If
12888 // the FP type doesn't need to be softened just let generic type
12889 // legalization promote the result type.
12890 RTLIB::Libcall LC;
12891 if (IsSigned)
12892 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
12893 else
12894 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
12895 MakeLibCallOptions CallOptions;
12896 EVT OpVT = Op0.getValueType();
12897 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
12898 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
12899 SDValue Result;
12900 std::tie(Result, Chain) =
12901 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
12902 Results.push_back(Result);
12903 if (IsStrict)
12904 Results.push_back(Chain);
12905 break;
12906 }
12907 case ISD::LROUND: {
12908 SDValue Op0 = N->getOperand(0);
12909 EVT Op0VT = Op0.getValueType();
12910 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
12912 if (!isTypeLegal(Op0VT))
12913 return;
12914
12915 // In absense of Zfh, promote f16 to f32, then convert.
12916 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
12917 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
12918
12919 SDValue Res =
12920 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
12921 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
12922 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12923 return;
12924 }
12925 // If the FP type needs to be softened, emit a library call to lround. We'll
12926 // need to truncate the result. We assume any value that doesn't fit in i32
12927 // is allowed to return an unspecified value.
12928 RTLIB::Libcall LC =
12929 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
12930 MakeLibCallOptions CallOptions;
12931 EVT OpVT = Op0.getValueType();
12932 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
12933 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
12934 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
12935 Results.push_back(Result);
12936 break;
12937 }
12940 assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "
12941 "has custom type legalization on riscv32");
12942
12943 SDValue LoCounter, HiCounter;
12944 MVT XLenVT = Subtarget.getXLenVT();
12945 if (N->getOpcode() == ISD::READCYCLECOUNTER) {
12946 LoCounter = DAG.getTargetConstant(RISCVSysReg::cycle, DL, XLenVT);
12947 HiCounter = DAG.getTargetConstant(RISCVSysReg::cycleh, DL, XLenVT);
12948 } else {
12949 LoCounter = DAG.getTargetConstant(RISCVSysReg::time, DL, XLenVT);
12950 HiCounter = DAG.getTargetConstant(RISCVSysReg::timeh, DL, XLenVT);
12951 }
12952 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
12954 N->getOperand(0), LoCounter, HiCounter);
12955
12956 Results.push_back(
12957 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
12958 Results.push_back(RCW.getValue(2));
12959 break;
12960 }
12961 case ISD::LOAD: {
12962 if (!ISD::isNON_EXTLoad(N))
12963 return;
12964
12965 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
12966 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
12967 LoadSDNode *Ld = cast<LoadSDNode>(N);
12968
12969 SDLoc dl(N);
12970 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
12971 Ld->getBasePtr(), Ld->getMemoryVT(),
12972 Ld->getMemOperand());
12973 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
12974 Results.push_back(Res.getValue(1));
12975 return;
12976 }
12977 case ISD::MUL: {
12978 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
12979 unsigned XLen = Subtarget.getXLen();
12980 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
12981 if (Size > XLen) {
12982 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
12983 SDValue LHS = N->getOperand(0);
12984 SDValue RHS = N->getOperand(1);
12985 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
12986
12987 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
12988 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
12989 // We need exactly one side to be unsigned.
12990 if (LHSIsU == RHSIsU)
12991 return;
12992
12993 auto MakeMULPair = [&](SDValue S, SDValue U) {
12994 MVT XLenVT = Subtarget.getXLenVT();
12995 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
12996 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
12997 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
12998 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
12999 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
13000 };
13001
13002 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
13003 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
13004
13005 // The other operand should be signed, but still prefer MULH when
13006 // possible.
13007 if (RHSIsU && LHSIsS && !RHSIsS)
13008 Results.push_back(MakeMULPair(LHS, RHS));
13009 else if (LHSIsU && RHSIsS && !LHSIsS)
13010 Results.push_back(MakeMULPair(RHS, LHS));
13011
13012 return;
13013 }
13014 [[fallthrough]];
13015 }
13016 case ISD::ADD:
13017 case ISD::SUB:
13018 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13019 "Unexpected custom legalisation");
13020 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
13021 break;
13022 case ISD::SHL:
13023 case ISD::SRA:
13024 case ISD::SRL:
13025 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13026 "Unexpected custom legalisation");
13027 if (N->getOperand(1).getOpcode() != ISD::Constant) {
13028 // If we can use a BSET instruction, allow default promotion to apply.
13029 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
13030 isOneConstant(N->getOperand(0)))
13031 break;
13032 Results.push_back(customLegalizeToWOp(N, DAG));
13033 break;
13034 }
13035
13036 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
13037 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
13038 // shift amount.
13039 if (N->getOpcode() == ISD::SHL) {
13040 SDLoc DL(N);
13041 SDValue NewOp0 =
13042 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13043 SDValue NewOp1 =
13044 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
13045 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
13046 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
13047 DAG.getValueType(MVT::i32));
13048 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
13049 }
13050
13051 break;
13052 case ISD::ROTL:
13053 case ISD::ROTR:
13054 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13055 "Unexpected custom legalisation");
13056 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
13057 Subtarget.hasVendorXTHeadBb()) &&
13058 "Unexpected custom legalization");
13059 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
13060 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
13061 return;
13062 Results.push_back(customLegalizeToWOp(N, DAG));
13063 break;
13064 case ISD::CTTZ:
13066 case ISD::CTLZ:
13067 case ISD::CTLZ_ZERO_UNDEF: {
13068 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13069 "Unexpected custom legalisation");
13070
13071 SDValue NewOp0 =
13072 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13073 bool IsCTZ =
13074 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
13075 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
13076 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
13077 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13078 return;
13079 }
13080 case ISD::SDIV:
13081 case ISD::UDIV:
13082 case ISD::UREM: {
13083 MVT VT = N->getSimpleValueType(0);
13084 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
13085 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
13086 "Unexpected custom legalisation");
13087 // Don't promote division/remainder by constant since we should expand those
13088 // to multiply by magic constant.
13090 if (N->getOperand(1).getOpcode() == ISD::Constant &&
13091 !isIntDivCheap(N->getValueType(0), Attr))
13092 return;
13093
13094 // If the input is i32, use ANY_EXTEND since the W instructions don't read
13095 // the upper 32 bits. For other types we need to sign or zero extend
13096 // based on the opcode.
13097 unsigned ExtOpc = ISD::ANY_EXTEND;
13098 if (VT != MVT::i32)
13099 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
13101
13102 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
13103 break;
13104 }
13105 case ISD::SADDO: {
13106 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13107 "Unexpected custom legalisation");
13108
13109 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
13110 // use the default legalization.
13111 if (!isa<ConstantSDNode>(N->getOperand(1)))
13112 return;
13113
13114 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
13115 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
13116 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
13117 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
13118 DAG.getValueType(MVT::i32));
13119
13120 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
13121
13122 // For an addition, the result should be less than one of the operands (LHS)
13123 // if and only if the other operand (RHS) is negative, otherwise there will
13124 // be overflow.
13125 // For a subtraction, the result should be less than one of the operands
13126 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
13127 // otherwise there will be overflow.
13128 EVT OType = N->getValueType(1);
13129 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
13130 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
13131
13132 SDValue Overflow =
13133 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
13134 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13135 Results.push_back(Overflow);
13136 return;
13137 }
13138 case ISD::UADDO:
13139 case ISD::USUBO: {
13140 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13141 "Unexpected custom legalisation");
13142 bool IsAdd = N->getOpcode() == ISD::UADDO;
13143 // Create an ADDW or SUBW.
13144 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13145 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13146 SDValue Res =
13147 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
13148 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
13149 DAG.getValueType(MVT::i32));
13150
13151 SDValue Overflow;
13152 if (IsAdd && isOneConstant(RHS)) {
13153 // Special case uaddo X, 1 overflowed if the addition result is 0.
13154 // The general case (X + C) < C is not necessarily beneficial. Although we
13155 // reduce the live range of X, we may introduce the materialization of
13156 // constant C, especially when the setcc result is used by branch. We have
13157 // no compare with constant and branch instructions.
13158 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
13159 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
13160 } else if (IsAdd && isAllOnesConstant(RHS)) {
13161 // Special case uaddo X, -1 overflowed if X != 0.
13162 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
13163 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
13164 } else {
13165 // Sign extend the LHS and perform an unsigned compare with the ADDW
13166 // result. Since the inputs are sign extended from i32, this is equivalent
13167 // to comparing the lower 32 bits.
13168 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
13169 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
13170 IsAdd ? ISD::SETULT : ISD::SETUGT);
13171 }
13172
13173 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13174 Results.push_back(Overflow);
13175 return;
13176 }
13177 case ISD::UADDSAT:
13178 case ISD::USUBSAT: {
13179 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13180 !Subtarget.hasStdExtZbb() && "Unexpected custom legalisation");
13181 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
13182 // promotion for UADDO/USUBO.
13183 Results.push_back(expandAddSubSat(N, DAG));
13184 return;
13185 }
13186 case ISD::SADDSAT:
13187 case ISD::SSUBSAT: {
13188 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13189 "Unexpected custom legalisation");
13190 Results.push_back(expandAddSubSat(N, DAG));
13191 return;
13192 }
13193 case ISD::ABS: {
13194 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13195 "Unexpected custom legalisation");
13196
13197 if (Subtarget.hasStdExtZbb()) {
13198 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
13199 // This allows us to remember that the result is sign extended. Expanding
13200 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
13201 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
13202 N->getOperand(0));
13203 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
13204 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
13205 return;
13206 }
13207
13208 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
13209 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13210
13211 // Freeze the source so we can increase it's use count.
13212 Src = DAG.getFreeze(Src);
13213
13214 // Copy sign bit to all bits using the sraiw pattern.
13215 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
13216 DAG.getValueType(MVT::i32));
13217 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
13218 DAG.getConstant(31, DL, MVT::i64));
13219
13220 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
13221 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
13222
13223 // NOTE: The result is only required to be anyextended, but sext is
13224 // consistent with type legalization of sub.
13225 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
13226 DAG.getValueType(MVT::i32));
13227 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
13228 return;
13229 }
13230 case ISD::BITCAST: {
13231 EVT VT = N->getValueType(0);
13232 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
13233 SDValue Op0 = N->getOperand(0);
13234 EVT Op0VT = Op0.getValueType();
13235 MVT XLenVT = Subtarget.getXLenVT();
13236 if (VT == MVT::i16 &&
13237 ((Op0VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
13238 (Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
13239 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
13240 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
13241 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
13242 Subtarget.hasStdExtFOrZfinx()) {
13243 SDValue FPConv =
13244 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
13245 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
13246 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && !Subtarget.is64Bit() &&
13247 Subtarget.hasStdExtDOrZdinx()) {
13248 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
13249 DAG.getVTList(MVT::i32, MVT::i32), Op0);
13250 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
13251 NewReg.getValue(0), NewReg.getValue(1));
13252 Results.push_back(RetReg);
13253 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
13254 isTypeLegal(Op0VT)) {
13255 // Custom-legalize bitcasts from fixed-length vector types to illegal
13256 // scalar types in order to improve codegen. Bitcast the vector to a
13257 // one-element vector type whose element type is the same as the result
13258 // type, and extract the first element.
13259 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
13260 if (isTypeLegal(BVT)) {
13261 SDValue BVec = DAG.getBitcast(BVT, Op0);
13262 Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
13263 DAG.getVectorIdxConstant(0, DL)));
13264 }
13265 }
13266 break;
13267 }
13268 case RISCVISD::BREV8:
13269 case RISCVISD::ORC_B: {
13270 MVT VT = N->getSimpleValueType(0);
13271 MVT XLenVT = Subtarget.getXLenVT();
13272 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
13273 "Unexpected custom legalisation");
13274 assert(((N->getOpcode() == RISCVISD::BREV8 && Subtarget.hasStdExtZbkb()) ||
13275 (N->getOpcode() == RISCVISD::ORC_B && Subtarget.hasStdExtZbb())) &&
13276 "Unexpected extension");
13277 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
13278 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
13279 // ReplaceNodeResults requires we maintain the same type for the return
13280 // value.
13281 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
13282 break;
13283 }
13285 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
13286 // type is illegal (currently only vXi64 RV32).
13287 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
13288 // transferred to the destination register. We issue two of these from the
13289 // upper- and lower- halves of the SEW-bit vector element, slid down to the
13290 // first element.
13291 SDValue Vec = N->getOperand(0);
13292 SDValue Idx = N->getOperand(1);
13293
13294 // The vector type hasn't been legalized yet so we can't issue target
13295 // specific nodes if it needs legalization.
13296 // FIXME: We would manually legalize if it's important.
13297 if (!isTypeLegal(Vec.getValueType()))
13298 return;
13299
13300 MVT VecVT = Vec.getSimpleValueType();
13301
13302 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
13303 VecVT.getVectorElementType() == MVT::i64 &&
13304 "Unexpected EXTRACT_VECTOR_ELT legalization");
13305
13306 // If this is a fixed vector, we need to convert it to a scalable vector.
13307 MVT ContainerVT = VecVT;
13308 if (VecVT.isFixedLengthVector()) {
13309 ContainerVT = getContainerForFixedLengthVector(VecVT);
13310 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
13311 }
13312
13313 MVT XLenVT = Subtarget.getXLenVT();
13314
13315 // Use a VL of 1 to avoid processing more elements than we need.
13316 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
13317
13318 // Unless the index is known to be 0, we must slide the vector down to get
13319 // the desired element into index 0.
13320 if (!isNullConstant(Idx)) {
13321 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
13322 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
13323 }
13324
13325 // Extract the lower XLEN bits of the correct vector element.
13326 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
13327
13328 // To extract the upper XLEN bits of the vector element, shift the first
13329 // element right by 32 bits and re-extract the lower XLEN bits.
13330 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13331 DAG.getUNDEF(ContainerVT),
13332 DAG.getConstant(32, DL, XLenVT), VL);
13333 SDValue LShr32 =
13334 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
13335 DAG.getUNDEF(ContainerVT), Mask, VL);
13336
13337 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
13338
13339 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
13340 break;
13341 }
13343 unsigned IntNo = N->getConstantOperandVal(0);
13344 switch (IntNo) {
13345 default:
13347 "Don't know how to custom type legalize this intrinsic!");
13348 case Intrinsic::experimental_get_vector_length: {
13349 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
13350 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13351 return;
13352 }
13353 case Intrinsic::experimental_cttz_elts: {
13354 SDValue Res = lowerCttzElts(N, DAG, Subtarget);
13355 Results.push_back(
13356 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res));
13357 return;
13358 }
13359 case Intrinsic::riscv_orc_b:
13360 case Intrinsic::riscv_brev8:
13361 case Intrinsic::riscv_sha256sig0:
13362 case Intrinsic::riscv_sha256sig1:
13363 case Intrinsic::riscv_sha256sum0:
13364 case Intrinsic::riscv_sha256sum1:
13365 case Intrinsic::riscv_sm3p0:
13366 case Intrinsic::riscv_sm3p1: {
13367 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13368 return;
13369 unsigned Opc;
13370 switch (IntNo) {
13371 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
13372 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
13373 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
13374 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
13375 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
13376 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
13377 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
13378 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
13379 }
13380
13381 SDValue NewOp =
13382 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13383 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
13384 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13385 return;
13386 }
13387 case Intrinsic::riscv_sm4ks:
13388 case Intrinsic::riscv_sm4ed: {
13389 unsigned Opc =
13390 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
13391 SDValue NewOp0 =
13392 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13393 SDValue NewOp1 =
13394 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13395 SDValue Res =
13396 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
13397 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13398 return;
13399 }
13400 case Intrinsic::riscv_mopr: {
13401 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13402 return;
13403 SDValue NewOp =
13404 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13405 SDValue Res = DAG.getNode(
13406 RISCVISD::MOPR, DL, MVT::i64, NewOp,
13407 DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
13408 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13409 return;
13410 }
13411 case Intrinsic::riscv_moprr: {
13412 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13413 return;
13414 SDValue NewOp0 =
13415 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13416 SDValue NewOp1 =
13417 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13418 SDValue Res = DAG.getNode(
13419 RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
13420 DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
13421 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13422 return;
13423 }
13424 case Intrinsic::riscv_clmul: {
13425 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13426 return;
13427
13428 SDValue NewOp0 =
13429 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13430 SDValue NewOp1 =
13431 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13432 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
13433 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13434 return;
13435 }
13436 case Intrinsic::riscv_clmulh:
13437 case Intrinsic::riscv_clmulr: {
13438 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13439 return;
13440
13441 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
13442 // to the full 128-bit clmul result of multiplying two xlen values.
13443 // Perform clmulr or clmulh on the shifted values. Finally, extract the
13444 // upper 32 bits.
13445 //
13446 // The alternative is to mask the inputs to 32 bits and use clmul, but
13447 // that requires two shifts to mask each input without zext.w.
13448 // FIXME: If the inputs are known zero extended or could be freely
13449 // zero extended, the mask form would be better.
13450 SDValue NewOp0 =
13451 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13452 SDValue NewOp1 =
13453 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13454 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
13455 DAG.getConstant(32, DL, MVT::i64));
13456 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
13457 DAG.getConstant(32, DL, MVT::i64));
13458 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
13460 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
13461 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
13462 DAG.getConstant(32, DL, MVT::i64));
13463 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13464 return;
13465 }
13466 case Intrinsic::riscv_vmv_x_s: {
13467 EVT VT = N->getValueType(0);
13468 MVT XLenVT = Subtarget.getXLenVT();
13469 if (VT.bitsLT(XLenVT)) {
13470 // Simple case just extract using vmv.x.s and truncate.
13471 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
13472 Subtarget.getXLenVT(), N->getOperand(1));
13473 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
13474 return;
13475 }
13476
13477 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
13478 "Unexpected custom legalization");
13479
13480 // We need to do the move in two steps.
13481 SDValue Vec = N->getOperand(1);
13482 MVT VecVT = Vec.getSimpleValueType();
13483
13484 // First extract the lower XLEN bits of the element.
13485 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
13486
13487 // To extract the upper XLEN bits of the vector element, shift the first
13488 // element right by 32 bits and re-extract the lower XLEN bits.
13489 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
13490
13491 SDValue ThirtyTwoV =
13492 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
13493 DAG.getConstant(32, DL, XLenVT), VL);
13494 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
13495 DAG.getUNDEF(VecVT), Mask, VL);
13496 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
13497
13498 Results.push_back(
13499 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
13500 break;
13501 }
13502 }
13503 break;
13504 }
13505 case ISD::VECREDUCE_ADD:
13506 case ISD::VECREDUCE_AND:
13507 case ISD::VECREDUCE_OR:
13508 case ISD::VECREDUCE_XOR:
13513 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
13514 Results.push_back(V);
13515 break;
13516 case ISD::VP_REDUCE_ADD:
13517 case ISD::VP_REDUCE_AND:
13518 case ISD::VP_REDUCE_OR:
13519 case ISD::VP_REDUCE_XOR:
13520 case ISD::VP_REDUCE_SMAX:
13521 case ISD::VP_REDUCE_UMAX:
13522 case ISD::VP_REDUCE_SMIN:
13523 case ISD::VP_REDUCE_UMIN:
13524 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
13525 Results.push_back(V);
13526 break;
13527 case ISD::GET_ROUNDING: {
13528 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
13529 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
13530 Results.push_back(Res.getValue(0));
13531 Results.push_back(Res.getValue(1));
13532 break;
13533 }
13534 }
13535}
13536
13537/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
13538/// which corresponds to it.
13539static unsigned getVecReduceOpcode(unsigned Opc) {
13540 switch (Opc) {
13541 default:
13542 llvm_unreachable("Unhandled binary to transfrom reduction");
13543 case ISD::ADD:
13544 return ISD::VECREDUCE_ADD;
13545 case ISD::UMAX:
13546 return ISD::VECREDUCE_UMAX;
13547 case ISD::SMAX:
13548 return ISD::VECREDUCE_SMAX;
13549 case ISD::UMIN:
13550 return ISD::VECREDUCE_UMIN;
13551 case ISD::SMIN:
13552 return ISD::VECREDUCE_SMIN;
13553 case ISD::AND:
13554 return ISD::VECREDUCE_AND;
13555 case ISD::OR:
13556 return ISD::VECREDUCE_OR;
13557 case ISD::XOR:
13558 return ISD::VECREDUCE_XOR;
13559 case ISD::FADD:
13560 // Note: This is the associative form of the generic reduction opcode.
13561 return ISD::VECREDUCE_FADD;
13562 }
13563}
13564
13565/// Perform two related transforms whose purpose is to incrementally recognize
13566/// an explode_vector followed by scalar reduction as a vector reduction node.
13567/// This exists to recover from a deficiency in SLP which can't handle
13568/// forests with multiple roots sharing common nodes. In some cases, one
13569/// of the trees will be vectorized, and the other will remain (unprofitably)
13570/// scalarized.
13571static SDValue
13573 const RISCVSubtarget &Subtarget) {
13574
13575 // This transforms need to run before all integer types have been legalized
13576 // to i64 (so that the vector element type matches the add type), and while
13577 // it's safe to introduce odd sized vector types.
13579 return SDValue();
13580
13581 // Without V, this transform isn't useful. We could form the (illegal)
13582 // operations and let them be scalarized again, but there's really no point.
13583 if (!Subtarget.hasVInstructions())
13584 return SDValue();
13585
13586 const SDLoc DL(N);
13587 const EVT VT = N->getValueType(0);
13588 const unsigned Opc = N->getOpcode();
13589
13590 // For FADD, we only handle the case with reassociation allowed. We
13591 // could handle strict reduction order, but at the moment, there's no
13592 // known reason to, and the complexity isn't worth it.
13593 // TODO: Handle fminnum and fmaxnum here
13594 if (!VT.isInteger() &&
13595 (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation()))
13596 return SDValue();
13597
13598 const unsigned ReduceOpc = getVecReduceOpcode(Opc);
13599 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
13600 "Inconsistent mappings");
13601 SDValue LHS = N->getOperand(0);
13602 SDValue RHS = N->getOperand(1);
13603
13604 if (!LHS.hasOneUse() || !RHS.hasOneUse())
13605 return SDValue();
13606
13607 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
13608 std::swap(LHS, RHS);
13609
13610 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
13611 !isa<ConstantSDNode>(RHS.getOperand(1)))
13612 return SDValue();
13613
13614 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
13615 SDValue SrcVec = RHS.getOperand(0);
13616 EVT SrcVecVT = SrcVec.getValueType();
13617 assert(SrcVecVT.getVectorElementType() == VT);
13618 if (SrcVecVT.isScalableVector())
13619 return SDValue();
13620
13621 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
13622 return SDValue();
13623
13624 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
13625 // reduce_op (extract_subvector [2 x VT] from V). This will form the
13626 // root of our reduction tree. TODO: We could extend this to any two
13627 // adjacent aligned constant indices if desired.
13628 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
13629 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
13630 uint64_t LHSIdx =
13631 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
13632 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
13633 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
13634 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
13635 DAG.getVectorIdxConstant(0, DL));
13636 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
13637 }
13638 }
13639
13640 // Match (binop (reduce (extract_subvector V, 0),
13641 // (extract_vector_elt V, sizeof(SubVec))))
13642 // into a reduction of one more element from the original vector V.
13643 if (LHS.getOpcode() != ReduceOpc)
13644 return SDValue();
13645
13646 SDValue ReduceVec = LHS.getOperand(0);
13647 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
13648 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
13649 isNullConstant(ReduceVec.getOperand(1)) &&
13650 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
13651 // For illegal types (e.g. 3xi32), most will be combined again into a
13652 // wider (hopefully legal) type. If this is a terminal state, we are
13653 // relying on type legalization here to produce something reasonable
13654 // and this lowering quality could probably be improved. (TODO)
13655 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
13656 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
13657 DAG.getVectorIdxConstant(0, DL));
13658 return DAG.getNode(ReduceOpc, DL, VT, Vec,
13659 ReduceVec->getFlags() & N->getFlags());
13660 }
13661
13662 return SDValue();
13663}
13664
13665
13666// Try to fold (<bop> x, (reduction.<bop> vec, start))
13668 const RISCVSubtarget &Subtarget) {
13669 auto BinOpToRVVReduce = [](unsigned Opc) {
13670 switch (Opc) {
13671 default:
13672 llvm_unreachable("Unhandled binary to transfrom reduction");
13673 case ISD::ADD:
13675 case ISD::UMAX:
13677 case ISD::SMAX:
13679 case ISD::UMIN:
13681 case ISD::SMIN:
13683 case ISD::AND:
13685 case ISD::OR:
13687 case ISD::XOR:
13689 case ISD::FADD:
13691 case ISD::FMAXNUM:
13693 case ISD::FMINNUM:
13695 }
13696 };
13697
13698 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
13699 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
13700 isNullConstant(V.getOperand(1)) &&
13701 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
13702 };
13703
13704 unsigned Opc = N->getOpcode();
13705 unsigned ReduceIdx;
13706 if (IsReduction(N->getOperand(0), Opc))
13707 ReduceIdx = 0;
13708 else if (IsReduction(N->getOperand(1), Opc))
13709 ReduceIdx = 1;
13710 else
13711 return SDValue();
13712
13713 // Skip if FADD disallows reassociation but the combiner needs.
13714 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
13715 return SDValue();
13716
13717 SDValue Extract = N->getOperand(ReduceIdx);
13718 SDValue Reduce = Extract.getOperand(0);
13719 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
13720 return SDValue();
13721
13722 SDValue ScalarV = Reduce.getOperand(2);
13723 EVT ScalarVT = ScalarV.getValueType();
13724 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
13725 ScalarV.getOperand(0)->isUndef() &&
13726 isNullConstant(ScalarV.getOperand(2)))
13727 ScalarV = ScalarV.getOperand(1);
13728
13729 // Make sure that ScalarV is a splat with VL=1.
13730 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
13731 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
13732 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
13733 return SDValue();
13734
13735 if (!isNonZeroAVL(ScalarV.getOperand(2)))
13736 return SDValue();
13737
13738 // Check the scalar of ScalarV is neutral element
13739 // TODO: Deal with value other than neutral element.
13740 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
13741 0))
13742 return SDValue();
13743
13744 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
13745 // FIXME: We might be able to improve this if operand 0 is undef.
13746 if (!isNonZeroAVL(Reduce.getOperand(5)))
13747 return SDValue();
13748
13749 SDValue NewStart = N->getOperand(1 - ReduceIdx);
13750
13751 SDLoc DL(N);
13752 SDValue NewScalarV =
13753 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
13754 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
13755
13756 // If we looked through an INSERT_SUBVECTOR we need to restore it.
13757 if (ScalarVT != ScalarV.getValueType())
13758 NewScalarV =
13759 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ScalarVT, DAG.getUNDEF(ScalarVT),
13760 NewScalarV, DAG.getVectorIdxConstant(0, DL));
13761
13762 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
13763 NewScalarV, Reduce.getOperand(3),
13764 Reduce.getOperand(4), Reduce.getOperand(5)};
13765 SDValue NewReduce =
13766 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
13767 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
13768 Extract.getOperand(1));
13769}
13770
13771// Optimize (add (shl x, c0), (shl y, c1)) ->
13772// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
13774 const RISCVSubtarget &Subtarget) {
13775 // Perform this optimization only in the zba extension.
13776 if (!Subtarget.hasStdExtZba())
13777 return SDValue();
13778
13779 // Skip for vector types and larger types.
13780 EVT VT = N->getValueType(0);
13781 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
13782 return SDValue();
13783
13784 // The two operand nodes must be SHL and have no other use.
13785 SDValue N0 = N->getOperand(0);
13786 SDValue N1 = N->getOperand(1);
13787 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
13788 !N0->hasOneUse() || !N1->hasOneUse())
13789 return SDValue();
13790
13791 // Check c0 and c1.
13792 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
13793 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
13794 if (!N0C || !N1C)
13795 return SDValue();
13796 int64_t C0 = N0C->getSExtValue();
13797 int64_t C1 = N1C->getSExtValue();
13798 if (C0 <= 0 || C1 <= 0)
13799 return SDValue();
13800
13801 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
13802 int64_t Bits = std::min(C0, C1);
13803 int64_t Diff = std::abs(C0 - C1);
13804 if (Diff != 1 && Diff != 2 && Diff != 3)
13805 return SDValue();
13806
13807 // Build nodes.
13808 SDLoc DL(N);
13809 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
13810 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
13811 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, NL,
13812 DAG.getConstant(Diff, DL, VT), NS);
13813 return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT));
13814}
13815
13816// Combine a constant select operand into its use:
13817//
13818// (and (select cond, -1, c), x)
13819// -> (select cond, x, (and x, c)) [AllOnes=1]
13820// (or (select cond, 0, c), x)
13821// -> (select cond, x, (or x, c)) [AllOnes=0]
13822// (xor (select cond, 0, c), x)
13823// -> (select cond, x, (xor x, c)) [AllOnes=0]
13824// (add (select cond, 0, c), x)
13825// -> (select cond, x, (add x, c)) [AllOnes=0]
13826// (sub x, (select cond, 0, c))
13827// -> (select cond, x, (sub x, c)) [AllOnes=0]
13829 SelectionDAG &DAG, bool AllOnes,
13830 const RISCVSubtarget &Subtarget) {
13831 EVT VT = N->getValueType(0);
13832
13833 // Skip vectors.
13834 if (VT.isVector())
13835 return SDValue();
13836
13837 if (!Subtarget.hasConditionalMoveFusion()) {
13838 // (select cond, x, (and x, c)) has custom lowering with Zicond.
13839 if ((!Subtarget.hasStdExtZicond() &&
13840 !Subtarget.hasVendorXVentanaCondOps()) ||
13841 N->getOpcode() != ISD::AND)
13842 return SDValue();
13843
13844 // Maybe harmful when condition code has multiple use.
13845 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
13846 return SDValue();
13847
13848 // Maybe harmful when VT is wider than XLen.
13849 if (VT.getSizeInBits() > Subtarget.getXLen())
13850 return SDValue();
13851 }
13852
13853 if ((Slct.getOpcode() != ISD::SELECT &&
13854 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
13855 !Slct.hasOneUse())
13856 return SDValue();
13857
13858 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
13860 };
13861
13862 bool SwapSelectOps;
13863 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
13864 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
13865 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
13866 SDValue NonConstantVal;
13867 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
13868 SwapSelectOps = false;
13869 NonConstantVal = FalseVal;
13870 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
13871 SwapSelectOps = true;
13872 NonConstantVal = TrueVal;
13873 } else
13874 return SDValue();
13875
13876 // Slct is now know to be the desired identity constant when CC is true.
13877 TrueVal = OtherOp;
13878 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
13879 // Unless SwapSelectOps says the condition should be false.
13880 if (SwapSelectOps)
13881 std::swap(TrueVal, FalseVal);
13882
13883 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
13884 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
13885 {Slct.getOperand(0), Slct.getOperand(1),
13886 Slct.getOperand(2), TrueVal, FalseVal});
13887
13888 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
13889 {Slct.getOperand(0), TrueVal, FalseVal});
13890}
13891
13892// Attempt combineSelectAndUse on each operand of a commutative operator N.
13894 bool AllOnes,
13895 const RISCVSubtarget &Subtarget) {
13896 SDValue N0 = N->getOperand(0);
13897 SDValue N1 = N->getOperand(1);
13898 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
13899 return Result;
13900 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
13901 return Result;
13902 return SDValue();
13903}
13904
13905// Transform (add (mul x, c0), c1) ->
13906// (add (mul (add x, c1/c0), c0), c1%c0).
13907// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
13908// that should be excluded is when c0*(c1/c0) is simm12, which will lead
13909// to an infinite loop in DAGCombine if transformed.
13910// Or transform (add (mul x, c0), c1) ->
13911// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
13912// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
13913// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
13914// lead to an infinite loop in DAGCombine if transformed.
13915// Or transform (add (mul x, c0), c1) ->
13916// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
13917// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
13918// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
13919// lead to an infinite loop in DAGCombine if transformed.
13920// Or transform (add (mul x, c0), c1) ->
13921// (mul (add x, c1/c0), c0).
13922// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
13924 const RISCVSubtarget &Subtarget) {
13925 // Skip for vector types and larger types.
13926 EVT VT = N->getValueType(0);
13927 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
13928 return SDValue();
13929 // The first operand node must be a MUL and has no other use.
13930 SDValue N0 = N->getOperand(0);
13931 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
13932 return SDValue();
13933 // Check if c0 and c1 match above conditions.
13934 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
13935 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
13936 if (!N0C || !N1C)
13937 return SDValue();
13938 // If N0C has multiple uses it's possible one of the cases in
13939 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
13940 // in an infinite loop.
13941 if (!N0C->hasOneUse())
13942 return SDValue();
13943 int64_t C0 = N0C->getSExtValue();
13944 int64_t C1 = N1C->getSExtValue();
13945 int64_t CA, CB;
13946 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
13947 return SDValue();
13948 // Search for proper CA (non-zero) and CB that both are simm12.
13949 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
13950 !isInt<12>(C0 * (C1 / C0))) {
13951 CA = C1 / C0;
13952 CB = C1 % C0;
13953 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
13954 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
13955 CA = C1 / C0 + 1;
13956 CB = C1 % C0 - C0;
13957 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
13958 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
13959 CA = C1 / C0 - 1;
13960 CB = C1 % C0 + C0;
13961 } else
13962 return SDValue();
13963 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
13964 SDLoc DL(N);
13965 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
13966 DAG.getSignedConstant(CA, DL, VT));
13967 SDValue New1 =
13968 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getSignedConstant(C0, DL, VT));
13969 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getSignedConstant(CB, DL, VT));
13970}
13971
13972// add (zext, zext) -> zext (add (zext, zext))
13973// sub (zext, zext) -> sext (sub (zext, zext))
13974// mul (zext, zext) -> zext (mul (zext, zext))
13975// sdiv (zext, zext) -> zext (sdiv (zext, zext))
13976// udiv (zext, zext) -> zext (udiv (zext, zext))
13977// srem (zext, zext) -> zext (srem (zext, zext))
13978// urem (zext, zext) -> zext (urem (zext, zext))
13979//
13980// where the sum of the extend widths match, and the the range of the bin op
13981// fits inside the width of the narrower bin op. (For profitability on rvv, we
13982// use a power of two for both inner and outer extend.)
13984
13985 EVT VT = N->getValueType(0);
13986 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
13987 return SDValue();
13988
13989 SDValue N0 = N->getOperand(0);
13990 SDValue N1 = N->getOperand(1);
13992 return SDValue();
13993 if (!N0.hasOneUse() || !N1.hasOneUse())
13994 return SDValue();
13995
13996 SDValue Src0 = N0.getOperand(0);
13997 SDValue Src1 = N1.getOperand(0);
13998 EVT SrcVT = Src0.getValueType();
13999 if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||
14000 SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||
14001 SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)
14002 return SDValue();
14003
14004 LLVMContext &C = *DAG.getContext();
14006 EVT NarrowVT = EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());
14007
14008 Src0 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src0), NarrowVT, Src0);
14009 Src1 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src1), NarrowVT, Src1);
14010
14011 // Src0 and Src1 are zero extended, so they're always positive if signed.
14012 //
14013 // sub can produce a negative from two positive operands, so it needs sign
14014 // extended. Other nodes produce a positive from two positive operands, so
14015 // zero extend instead.
14016 unsigned OuterExtend =
14017 N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
14018
14019 return DAG.getNode(
14020 OuterExtend, SDLoc(N), VT,
14021 DAG.getNode(N->getOpcode(), SDLoc(N), NarrowVT, Src0, Src1));
14022}
14023
14024// Try to turn (add (xor bool, 1) -1) into (neg bool).
14026 SDValue N0 = N->getOperand(0);
14027 SDValue N1 = N->getOperand(1);
14028 EVT VT = N->getValueType(0);
14029 SDLoc DL(N);
14030
14031 // RHS should be -1.
14032 if (!isAllOnesConstant(N1))
14033 return SDValue();
14034
14035 // Look for (xor X, 1).
14036 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
14037 return SDValue();
14038
14039 // First xor input should be 0 or 1.
14041 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
14042 return SDValue();
14043
14044 // Emit a negate of the setcc.
14045 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
14046 N0.getOperand(0));
14047}
14048
14051 const RISCVSubtarget &Subtarget) {
14052 SelectionDAG &DAG = DCI.DAG;
14053 if (SDValue V = combineAddOfBooleanXor(N, DAG))
14054 return V;
14055 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
14056 return V;
14057 if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer())
14058 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
14059 return V;
14060 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14061 return V;
14062 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14063 return V;
14064 if (SDValue V = combineBinOpOfZExt(N, DAG))
14065 return V;
14066
14067 // fold (add (select lhs, rhs, cc, 0, y), x) ->
14068 // (select lhs, rhs, cc, x, (add x, y))
14069 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
14070}
14071
14072// Try to turn a sub boolean RHS and constant LHS into an addi.
14074 SDValue N0 = N->getOperand(0);
14075 SDValue N1 = N->getOperand(1);
14076 EVT VT = N->getValueType(0);
14077 SDLoc DL(N);
14078
14079 // Require a constant LHS.
14080 auto *N0C = dyn_cast<ConstantSDNode>(N0);
14081 if (!N0C)
14082 return SDValue();
14083
14084 // All our optimizations involve subtracting 1 from the immediate and forming
14085 // an ADDI. Make sure the new immediate is valid for an ADDI.
14086 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
14087 if (!ImmValMinus1.isSignedIntN(12))
14088 return SDValue();
14089
14090 SDValue NewLHS;
14091 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
14092 // (sub constant, (setcc x, y, eq/neq)) ->
14093 // (add (setcc x, y, neq/eq), constant - 1)
14094 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
14095 EVT SetCCOpVT = N1.getOperand(0).getValueType();
14096 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
14097 return SDValue();
14098 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
14099 NewLHS =
14100 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
14101 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
14102 N1.getOperand(0).getOpcode() == ISD::SETCC) {
14103 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
14104 // Since setcc returns a bool the xor is equivalent to 1-setcc.
14105 NewLHS = N1.getOperand(0);
14106 } else
14107 return SDValue();
14108
14109 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
14110 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
14111}
14112
14113// Looks for (sub (shl X, 8-Y), (shr X, Y)) where the Y-th bit in each byte is
14114// potentially set. It is fine for Y to be 0, meaning that (sub (shl X, 8), X)
14115// is also valid. Replace with (orc.b X). For example, 0b0000_1000_0000_1000 is
14116// valid with Y=3, while 0b0000_1000_0000_0100 is not.
14118 const RISCVSubtarget &Subtarget) {
14119 if (!Subtarget.hasStdExtZbb())
14120 return SDValue();
14121
14122 EVT VT = N->getValueType(0);
14123
14124 if (VT != Subtarget.getXLenVT() && VT != MVT::i32 && VT != MVT::i16)
14125 return SDValue();
14126
14127 SDValue N0 = N->getOperand(0);
14128 SDValue N1 = N->getOperand(1);
14129
14130 if (N0->getOpcode() != ISD::SHL)
14131 return SDValue();
14132
14133 auto *ShAmtCLeft = dyn_cast<ConstantSDNode>(N0.getOperand(1));
14134 if (!ShAmtCLeft)
14135 return SDValue();
14136 unsigned ShiftedAmount = 8 - ShAmtCLeft->getZExtValue();
14137
14138 if (ShiftedAmount >= 8)
14139 return SDValue();
14140
14141 SDValue LeftShiftOperand = N0->getOperand(0);
14142 SDValue RightShiftOperand = N1;
14143
14144 if (ShiftedAmount != 0) { // Right operand must be a right shift.
14145 if (N1->getOpcode() != ISD::SRL)
14146 return SDValue();
14147 auto *ShAmtCRight = dyn_cast<ConstantSDNode>(N1.getOperand(1));
14148 if (!ShAmtCRight || ShAmtCRight->getZExtValue() != ShiftedAmount)
14149 return SDValue();
14150 RightShiftOperand = N1.getOperand(0);
14151 }
14152
14153 // At least one shift should have a single use.
14154 if (!N0.hasOneUse() && (ShiftedAmount == 0 || !N1.hasOneUse()))
14155 return SDValue();
14156
14157 if (LeftShiftOperand != RightShiftOperand)
14158 return SDValue();
14159
14160 APInt Mask = APInt::getSplat(VT.getSizeInBits(), APInt(8, 0x1));
14161 Mask <<= ShiftedAmount;
14162 // Check that X has indeed the right shape (only the Y-th bit can be set in
14163 // every byte).
14164 if (!DAG.MaskedValueIsZero(LeftShiftOperand, ~Mask))
14165 return SDValue();
14166
14167 return DAG.getNode(RISCVISD::ORC_B, SDLoc(N), VT, LeftShiftOperand);
14168}
14169
14171 const RISCVSubtarget &Subtarget) {
14172 if (SDValue V = combineSubOfBoolean(N, DAG))
14173 return V;
14174
14175 EVT VT = N->getValueType(0);
14176 SDValue N0 = N->getOperand(0);
14177 SDValue N1 = N->getOperand(1);
14178 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
14179 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
14180 isNullConstant(N1.getOperand(1))) {
14181 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
14182 if (CCVal == ISD::SETLT) {
14183 SDLoc DL(N);
14184 unsigned ShAmt = N0.getValueSizeInBits() - 1;
14185 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
14186 DAG.getConstant(ShAmt, DL, VT));
14187 }
14188 }
14189
14190 if (SDValue V = combineBinOpOfZExt(N, DAG))
14191 return V;
14192 if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))
14193 return V;
14194
14195 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
14196 // (select lhs, rhs, cc, x, (sub x, y))
14197 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
14198}
14199
14200// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
14201// Legalizing setcc can introduce xors like this. Doing this transform reduces
14202// the number of xors and may allow the xor to fold into a branch condition.
14204 SDValue N0 = N->getOperand(0);
14205 SDValue N1 = N->getOperand(1);
14206 bool IsAnd = N->getOpcode() == ISD::AND;
14207
14208 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
14209 return SDValue();
14210
14211 if (!N0.hasOneUse() || !N1.hasOneUse())
14212 return SDValue();
14213
14214 SDValue N01 = N0.getOperand(1);
14215 SDValue N11 = N1.getOperand(1);
14216
14217 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
14218 // (xor X, -1) based on the upper bits of the other operand being 0. If the
14219 // operation is And, allow one of the Xors to use -1.
14220 if (isOneConstant(N01)) {
14221 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
14222 return SDValue();
14223 } else if (isOneConstant(N11)) {
14224 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
14225 if (!(IsAnd && isAllOnesConstant(N01)))
14226 return SDValue();
14227 } else
14228 return SDValue();
14229
14230 EVT VT = N->getValueType(0);
14231
14232 SDValue N00 = N0.getOperand(0);
14233 SDValue N10 = N1.getOperand(0);
14234
14235 // The LHS of the xors needs to be 0/1.
14237 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
14238 return SDValue();
14239
14240 // Invert the opcode and insert a new xor.
14241 SDLoc DL(N);
14242 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
14243 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
14244 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
14245}
14246
14247// Fold (vXi8 (trunc (vselect (setltu, X, 256), X, (sext (setgt X, 0))))) to
14248// (vXi8 (trunc (smin (smax X, 0), 255))). This represents saturating a signed
14249// value to an unsigned value. This will be lowered to vmax and series of
14250// vnclipu instructions later. This can be extended to other truncated types
14251// other than i8 by replacing 256 and 255 with the equivalent constants for the
14252// type.
14254 EVT VT = N->getValueType(0);
14255 SDValue N0 = N->getOperand(0);
14256 EVT SrcVT = N0.getValueType();
14257
14258 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14259 if (!VT.isVector() || !TLI.isTypeLegal(VT) || !TLI.isTypeLegal(SrcVT))
14260 return SDValue();
14261
14262 if (N0.getOpcode() != ISD::VSELECT || !N0.hasOneUse())
14263 return SDValue();
14264
14265 SDValue Cond = N0.getOperand(0);
14266 SDValue True = N0.getOperand(1);
14267 SDValue False = N0.getOperand(2);
14268
14269 if (Cond.getOpcode() != ISD::SETCC)
14270 return SDValue();
14271
14272 // FIXME: Support the version of this pattern with the select operands
14273 // swapped.
14274 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
14275 if (CCVal != ISD::SETULT)
14276 return SDValue();
14277
14278 SDValue CondLHS = Cond.getOperand(0);
14279 SDValue CondRHS = Cond.getOperand(1);
14280
14281 if (CondLHS != True)
14282 return SDValue();
14283
14284 unsigned ScalarBits = VT.getScalarSizeInBits();
14285
14286 // FIXME: Support other constants.
14287 ConstantSDNode *CondRHSC = isConstOrConstSplat(CondRHS);
14288 if (!CondRHSC || CondRHSC->getAPIntValue() != (1ULL << ScalarBits))
14289 return SDValue();
14290
14291 if (False.getOpcode() != ISD::SIGN_EXTEND)
14292 return SDValue();
14293
14294 False = False.getOperand(0);
14295
14296 if (False.getOpcode() != ISD::SETCC || False.getOperand(0) != True)
14297 return SDValue();
14298
14299 ConstantSDNode *FalseRHSC = isConstOrConstSplat(False.getOperand(1));
14300 if (!FalseRHSC || !FalseRHSC->isZero())
14301 return SDValue();
14302
14303 ISD::CondCode CCVal2 = cast<CondCodeSDNode>(False.getOperand(2))->get();
14304 if (CCVal2 != ISD::SETGT)
14305 return SDValue();
14306
14307 // Emit the signed to unsigned saturation pattern.
14308 SDLoc DL(N);
14309 SDValue Max =
14310 DAG.getNode(ISD::SMAX, DL, SrcVT, True, DAG.getConstant(0, DL, SrcVT));
14311 SDValue Min =
14312 DAG.getNode(ISD::SMIN, DL, SrcVT, Max,
14313 DAG.getConstant((1ULL << ScalarBits) - 1, DL, SrcVT));
14314 return DAG.getNode(ISD::TRUNCATE, DL, VT, Min);
14315}
14316
14318 const RISCVSubtarget &Subtarget) {
14319 SDValue N0 = N->getOperand(0);
14320 EVT VT = N->getValueType(0);
14321
14322 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
14323 // extending X. This is safe since we only need the LSB after the shift and
14324 // shift amounts larger than 31 would produce poison. If we wait until
14325 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
14326 // to use a BEXT instruction.
14327 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
14328 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
14329 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
14330 SDLoc DL(N0);
14331 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
14332 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
14333 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
14334 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
14335 }
14336
14337 return combineTruncSelectToSMaxUSat(N, DAG);
14338}
14339
14340// Combines two comparison operation and logic operation to one selection
14341// operation(min, max) and logic operation. Returns new constructed Node if
14342// conditions for optimization are satisfied.
14345 const RISCVSubtarget &Subtarget) {
14346 SelectionDAG &DAG = DCI.DAG;
14347
14348 SDValue N0 = N->getOperand(0);
14349 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
14350 // extending X. This is safe since we only need the LSB after the shift and
14351 // shift amounts larger than 31 would produce poison. If we wait until
14352 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
14353 // to use a BEXT instruction.
14354 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
14355 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
14356 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
14357 N0.hasOneUse()) {
14358 SDLoc DL(N);
14359 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
14360 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
14361 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
14362 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
14363 DAG.getConstant(1, DL, MVT::i64));
14364 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
14365 }
14366
14367 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14368 return V;
14369 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14370 return V;
14371
14372 if (DCI.isAfterLegalizeDAG())
14373 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
14374 return V;
14375
14376 // fold (and (select lhs, rhs, cc, -1, y), x) ->
14377 // (select lhs, rhs, cc, x, (and x, y))
14378 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
14379}
14380
14381// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
14382// FIXME: Generalize to other binary operators with same operand.
14384 SelectionDAG &DAG) {
14385 assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
14386
14387 if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
14389 !N0.hasOneUse() || !N1.hasOneUse())
14390 return SDValue();
14391
14392 // Should have the same condition.
14393 SDValue Cond = N0.getOperand(1);
14394 if (Cond != N1.getOperand(1))
14395 return SDValue();
14396
14397 SDValue TrueV = N0.getOperand(0);
14398 SDValue FalseV = N1.getOperand(0);
14399
14400 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
14401 TrueV.getOperand(1) != FalseV.getOperand(1) ||
14402 !isOneConstant(TrueV.getOperand(1)) ||
14403 !TrueV.hasOneUse() || !FalseV.hasOneUse())
14404 return SDValue();
14405
14406 EVT VT = N->getValueType(0);
14407 SDLoc DL(N);
14408
14409 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
14410 Cond);
14411 SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0),
14412 Cond);
14413 SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1);
14414 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
14415}
14416
14418 const RISCVSubtarget &Subtarget) {
14419 SelectionDAG &DAG = DCI.DAG;
14420
14421 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14422 return V;
14423 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14424 return V;
14425
14426 if (DCI.isAfterLegalizeDAG())
14427 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
14428 return V;
14429
14430 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
14431 // We may be able to pull a common operation out of the true and false value.
14432 SDValue N0 = N->getOperand(0);
14433 SDValue N1 = N->getOperand(1);
14434 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
14435 return V;
14436 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
14437 return V;
14438
14439 // fold (or (select cond, 0, y), x) ->
14440 // (select cond, x, (or x, y))
14441 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
14442}
14443
14445 const RISCVSubtarget &Subtarget) {
14446 SDValue N0 = N->getOperand(0);
14447 SDValue N1 = N->getOperand(1);
14448
14449 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
14450 // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create
14451 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
14452 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
14453 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
14454 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
14455 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
14456 SDLoc DL(N);
14457 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
14458 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
14459 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
14460 SDValue And = DAG.getNOT(DL, Shl, MVT::i64);
14461 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
14462 }
14463
14464 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
14465 // NOTE: Assumes ROL being legal means ROLW is legal.
14466 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14467 if (N0.getOpcode() == RISCVISD::SLLW &&
14469 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
14470 SDLoc DL(N);
14471 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
14472 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
14473 }
14474
14475 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
14476 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
14477 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
14478 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
14479 if (ConstN00 && CC == ISD::SETLT) {
14480 EVT VT = N0.getValueType();
14481 SDLoc DL(N0);
14482 const APInt &Imm = ConstN00->getAPIntValue();
14483 if ((Imm + 1).isSignedIntN(12))
14484 return DAG.getSetCC(DL, VT, N0.getOperand(1),
14485 DAG.getConstant(Imm + 1, DL, VT), CC);
14486 }
14487 }
14488
14489 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14490 return V;
14491 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14492 return V;
14493
14494 // fold (xor (select cond, 0, y), x) ->
14495 // (select cond, x, (xor x, y))
14496 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
14497}
14498
14499// Try to expand a scalar multiply to a faster sequence.
14502 const RISCVSubtarget &Subtarget) {
14503
14504 EVT VT = N->getValueType(0);
14505
14506 // LI + MUL is usually smaller than the alternative sequence.
14508 return SDValue();
14509
14510 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14511 return SDValue();
14512
14513 if (VT != Subtarget.getXLenVT())
14514 return SDValue();
14515
14516 const bool HasShlAdd =
14517 Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa();
14518
14519 ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
14520 if (!CNode)
14521 return SDValue();
14522 uint64_t MulAmt = CNode->getZExtValue();
14523
14524 // WARNING: The code below is knowingly incorrect with regards to undef semantics.
14525 // We're adding additional uses of X here, and in principle, we should be freezing
14526 // X before doing so. However, adding freeze here causes real regressions, and no
14527 // other target properly freezes X in these cases either.
14528 SDValue X = N->getOperand(0);
14529
14530 if (HasShlAdd) {
14531 for (uint64_t Divisor : {3, 5, 9}) {
14532 if (MulAmt % Divisor != 0)
14533 continue;
14534 uint64_t MulAmt2 = MulAmt / Divisor;
14535 // 3/5/9 * 2^N -> shl (shXadd X, X), N
14536 if (isPowerOf2_64(MulAmt2)) {
14537 SDLoc DL(N);
14538 SDValue X = N->getOperand(0);
14539 // Put the shift first if we can fold a zext into the
14540 // shift forming a slli.uw.
14541 if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
14542 X.getConstantOperandVal(1) == UINT64_C(0xffffffff)) {
14543 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, X,
14544 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
14545 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl,
14546 DAG.getConstant(Log2_64(Divisor - 1), DL, VT),
14547 Shl);
14548 }
14549 // Otherwise, put rhe shl second so that it can fold with following
14550 // instructions (e.g. sext or add).
14551 SDValue Mul359 =
14552 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14553 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14554 return DAG.getNode(ISD::SHL, DL, VT, Mul359,
14555 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
14556 }
14557
14558 // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
14559 if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) {
14560 SDLoc DL(N);
14561 SDValue Mul359 =
14562 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14563 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14564 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
14565 DAG.getConstant(Log2_64(MulAmt2 - 1), DL, VT),
14566 Mul359);
14567 }
14568 }
14569
14570 // If this is a power 2 + 2/4/8, we can use a shift followed by a single
14571 // shXadd. First check if this a sum of two power of 2s because that's
14572 // easy. Then count how many zeros are up to the first bit.
14573 if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
14574 unsigned ScaleShift = llvm::countr_zero(MulAmt);
14575 if (ScaleShift >= 1 && ScaleShift < 4) {
14576 unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
14577 SDLoc DL(N);
14578 SDValue Shift1 =
14579 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
14580 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14581 DAG.getConstant(ScaleShift, DL, VT), Shift1);
14582 }
14583 }
14584
14585 // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
14586 // This is the two instruction form, there are also three instruction
14587 // variants we could implement. e.g.
14588 // (2^(1,2,3) * 3,5,9 + 1) << C2
14589 // 2^(C1>3) * 3,5,9 +/- 1
14590 for (uint64_t Divisor : {3, 5, 9}) {
14591 uint64_t C = MulAmt - 1;
14592 if (C <= Divisor)
14593 continue;
14594 unsigned TZ = llvm::countr_zero(C);
14595 if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) {
14596 SDLoc DL(N);
14597 SDValue Mul359 =
14598 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14599 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14600 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
14601 DAG.getConstant(TZ, DL, VT), X);
14602 }
14603 }
14604
14605 // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
14606 if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
14607 unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
14608 if (ScaleShift >= 1 && ScaleShift < 4) {
14609 unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2)));
14610 SDLoc DL(N);
14611 SDValue Shift1 =
14612 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
14613 return DAG.getNode(ISD::ADD, DL, VT, Shift1,
14614 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14615 DAG.getConstant(ScaleShift, DL, VT), X));
14616 }
14617 }
14618
14619 // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
14620 for (uint64_t Offset : {3, 5, 9}) {
14621 if (isPowerOf2_64(MulAmt + Offset)) {
14622 SDLoc DL(N);
14623 SDValue Shift1 =
14624 DAG.getNode(ISD::SHL, DL, VT, X,
14625 DAG.getConstant(Log2_64(MulAmt + Offset), DL, VT));
14626 SDValue Mul359 =
14627 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14628 DAG.getConstant(Log2_64(Offset - 1), DL, VT), X);
14629 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
14630 }
14631 }
14632 }
14633
14634 // 2^N - 2^M -> (sub (shl X, C1), (shl X, C2))
14635 uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
14636 if (isPowerOf2_64(MulAmt + MulAmtLowBit)) {
14637 uint64_t ShiftAmt1 = MulAmt + MulAmtLowBit;
14638 SDLoc DL(N);
14639 SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
14640 DAG.getConstant(Log2_64(ShiftAmt1), DL, VT));
14641 SDValue Shift2 =
14642 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
14643 DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT));
14644 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Shift2);
14645 }
14646
14647 if (HasShlAdd) {
14648 for (uint64_t Divisor : {3, 5, 9}) {
14649 if (MulAmt % Divisor != 0)
14650 continue;
14651 uint64_t MulAmt2 = MulAmt / Divisor;
14652 // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
14653 // of 25 which happen to be quite common.
14654 for (uint64_t Divisor2 : {3, 5, 9}) {
14655 if (MulAmt2 % Divisor2 != 0)
14656 continue;
14657 uint64_t MulAmt3 = MulAmt2 / Divisor2;
14658 if (isPowerOf2_64(MulAmt3)) {
14659 SDLoc DL(N);
14660 SDValue Mul359A =
14661 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14662 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14663 SDValue Mul359B = DAG.getNode(
14664 RISCVISD::SHL_ADD, DL, VT, Mul359A,
14665 DAG.getConstant(Log2_64(Divisor2 - 1), DL, VT), Mul359A);
14666 return DAG.getNode(ISD::SHL, DL, VT, Mul359B,
14667 DAG.getConstant(Log2_64(MulAmt3), DL, VT));
14668 }
14669 }
14670 }
14671 }
14672
14673 return SDValue();
14674}
14675
14676// Combine vXi32 (mul (and (lshr X, 15), 0x10001), 0xffff) ->
14677// (bitcast (sra (v2Xi16 (bitcast X)), 15))
14678// Same for other equivalent types with other equivalent constants.
14680 EVT VT = N->getValueType(0);
14681 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14682
14683 // Do this for legal vectors unless they are i1 or i8 vectors.
14684 if (!VT.isVector() || !TLI.isTypeLegal(VT) || VT.getScalarSizeInBits() < 16)
14685 return SDValue();
14686
14687 if (N->getOperand(0).getOpcode() != ISD::AND ||
14688 N->getOperand(0).getOperand(0).getOpcode() != ISD::SRL)
14689 return SDValue();
14690
14691 SDValue And = N->getOperand(0);
14692 SDValue Srl = And.getOperand(0);
14693
14694 APInt V1, V2, V3;
14695 if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) ||
14696 !ISD::isConstantSplatVector(And.getOperand(1).getNode(), V2) ||
14698 return SDValue();
14699
14700 unsigned HalfSize = VT.getScalarSizeInBits() / 2;
14701 if (!V1.isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
14702 V3 != (HalfSize - 1))
14703 return SDValue();
14704
14705 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(),
14706 EVT::getIntegerVT(*DAG.getContext(), HalfSize),
14707 VT.getVectorElementCount() * 2);
14708 SDLoc DL(N);
14709 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, HalfVT, Srl.getOperand(0));
14710 SDValue Sra = DAG.getNode(ISD::SRA, DL, HalfVT, Cast,
14711 DAG.getConstant(HalfSize - 1, DL, HalfVT));
14712 return DAG.getNode(ISD::BITCAST, DL, VT, Sra);
14713}
14714
14717 const RISCVSubtarget &Subtarget) {
14718 EVT VT = N->getValueType(0);
14719 if (!VT.isVector())
14720 return expandMul(N, DAG, DCI, Subtarget);
14721
14722 SDLoc DL(N);
14723 SDValue N0 = N->getOperand(0);
14724 SDValue N1 = N->getOperand(1);
14725 SDValue MulOper;
14726 unsigned AddSubOpc;
14727
14728 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
14729 // (mul x, add (y, 1)) -> (add x, (mul x, y))
14730 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
14731 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
14732 auto IsAddSubWith1 = [&](SDValue V) -> bool {
14733 AddSubOpc = V->getOpcode();
14734 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
14735 SDValue Opnd = V->getOperand(1);
14736 MulOper = V->getOperand(0);
14737 if (AddSubOpc == ISD::SUB)
14738 std::swap(Opnd, MulOper);
14739 if (isOneOrOneSplat(Opnd))
14740 return true;
14741 }
14742 return false;
14743 };
14744
14745 if (IsAddSubWith1(N0)) {
14746 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
14747 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
14748 }
14749
14750 if (IsAddSubWith1(N1)) {
14751 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
14752 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
14753 }
14754
14755 if (SDValue V = combineBinOpOfZExt(N, DAG))
14756 return V;
14757
14759 return V;
14760
14761 return SDValue();
14762}
14763
14764/// According to the property that indexed load/store instructions zero-extend
14765/// their indices, try to narrow the type of index operand.
14766static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
14767 if (isIndexTypeSigned(IndexType))
14768 return false;
14769
14770 if (!N->hasOneUse())
14771 return false;
14772
14773 EVT VT = N.getValueType();
14774 SDLoc DL(N);
14775
14776 // In general, what we're doing here is seeing if we can sink a truncate to
14777 // a smaller element type into the expression tree building our index.
14778 // TODO: We can generalize this and handle a bunch more cases if useful.
14779
14780 // Narrow a buildvector to the narrowest element type. This requires less
14781 // work and less register pressure at high LMUL, and creates smaller constants
14782 // which may be cheaper to materialize.
14783 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
14784 KnownBits Known = DAG.computeKnownBits(N);
14785 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
14786 LLVMContext &C = *DAG.getContext();
14787 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
14788 if (ResultVT.bitsLT(VT.getVectorElementType())) {
14789 N = DAG.getNode(ISD::TRUNCATE, DL,
14790 VT.changeVectorElementType(ResultVT), N);
14791 return true;
14792 }
14793 }
14794
14795 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
14796 if (N.getOpcode() != ISD::SHL)
14797 return false;
14798
14799 SDValue N0 = N.getOperand(0);
14800 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
14802 return false;
14803 if (!N0->hasOneUse())
14804 return false;
14805
14806 APInt ShAmt;
14807 SDValue N1 = N.getOperand(1);
14808 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
14809 return false;
14810
14811 SDValue Src = N0.getOperand(0);
14812 EVT SrcVT = Src.getValueType();
14813 unsigned SrcElen = SrcVT.getScalarSizeInBits();
14814 unsigned ShAmtV = ShAmt.getZExtValue();
14815 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
14816 NewElen = std::max(NewElen, 8U);
14817
14818 // Skip if NewElen is not narrower than the original extended type.
14819 if (NewElen >= N0.getValueType().getScalarSizeInBits())
14820 return false;
14821
14822 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
14823 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
14824
14825 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
14826 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
14827 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
14828 return true;
14829}
14830
14831// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
14832// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
14833// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
14834// can become a sext.w instead of a shift pair.
14836 const RISCVSubtarget &Subtarget) {
14837 SDValue N0 = N->getOperand(0);
14838 SDValue N1 = N->getOperand(1);
14839 EVT VT = N->getValueType(0);
14840 EVT OpVT = N0.getValueType();
14841
14842 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
14843 return SDValue();
14844
14845 // RHS needs to be a constant.
14846 auto *N1C = dyn_cast<ConstantSDNode>(N1);
14847 if (!N1C)
14848 return SDValue();
14849
14850 // LHS needs to be (and X, 0xffffffff).
14851 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
14852 !isa<ConstantSDNode>(N0.getOperand(1)) ||
14853 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
14854 return SDValue();
14855
14856 // Looking for an equality compare.
14857 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
14858 if (!isIntEqualitySetCC(Cond))
14859 return SDValue();
14860
14861 // Don't do this if the sign bit is provably zero, it will be turned back into
14862 // an AND.
14863 APInt SignMask = APInt::getOneBitSet(64, 31);
14864 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
14865 return SDValue();
14866
14867 const APInt &C1 = N1C->getAPIntValue();
14868
14869 SDLoc dl(N);
14870 // If the constant is larger than 2^32 - 1 it is impossible for both sides
14871 // to be equal.
14872 if (C1.getActiveBits() > 32)
14873 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
14874
14875 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
14876 N0.getOperand(0), DAG.getValueType(MVT::i32));
14877 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
14878 dl, OpVT), Cond);
14879}
14880
14881static SDValue
14883 const RISCVSubtarget &Subtarget) {
14884 SDValue Src = N->getOperand(0);
14885 EVT VT = N->getValueType(0);
14886 EVT SrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
14887 unsigned Opc = Src.getOpcode();
14888
14889 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
14890 // Don't do this with Zhinx. We need to explicitly sign extend the GPR.
14891 if (Opc == RISCVISD::FMV_X_ANYEXTH && SrcVT.bitsGE(MVT::i16) &&
14892 Subtarget.hasStdExtZfhmin())
14893 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT,
14894 Src.getOperand(0));
14895
14896 // Fold (sext_inreg (shl X, Y), i32) -> (sllw X, Y) iff Y u< 32
14897 if (Opc == ISD::SHL && Subtarget.is64Bit() && SrcVT == MVT::i32 &&
14898 VT == MVT::i64 && !isa<ConstantSDNode>(Src.getOperand(1)) &&
14899 DAG.computeKnownBits(Src.getOperand(1)).countMaxActiveBits() <= 5)
14900 return DAG.getNode(RISCVISD::SLLW, SDLoc(N), VT, Src.getOperand(0),
14901 Src.getOperand(1));
14902
14903 return SDValue();
14904}
14905
14906namespace {
14907// Forward declaration of the structure holding the necessary information to
14908// apply a combine.
14909struct CombineResult;
14910
14911enum ExtKind : uint8_t { ZExt = 1 << 0, SExt = 1 << 1, FPExt = 1 << 2 };
14912/// Helper class for folding sign/zero extensions.
14913/// In particular, this class is used for the following combines:
14914/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
14915/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
14916/// mul | mul_vl -> vwmul(u) | vwmul_su
14917/// shl | shl_vl -> vwsll
14918/// fadd -> vfwadd | vfwadd_w
14919/// fsub -> vfwsub | vfwsub_w
14920/// fmul -> vfwmul
14921/// An object of this class represents an operand of the operation we want to
14922/// combine.
14923/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
14924/// NodeExtensionHelper for `a` and one for `b`.
14925///
14926/// This class abstracts away how the extension is materialized and
14927/// how its number of users affect the combines.
14928///
14929/// In particular:
14930/// - VWADD_W is conceptually == add(op0, sext(op1))
14931/// - VWADDU_W == add(op0, zext(op1))
14932/// - VWSUB_W == sub(op0, sext(op1))
14933/// - VWSUBU_W == sub(op0, zext(op1))
14934/// - VFWADD_W == fadd(op0, fpext(op1))
14935/// - VFWSUB_W == fsub(op0, fpext(op1))
14936/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
14937/// zext|sext(smaller_value).
14938struct NodeExtensionHelper {
14939 /// Records if this operand is like being zero extended.
14940 bool SupportsZExt;
14941 /// Records if this operand is like being sign extended.
14942 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
14943 /// instance, a splat constant (e.g., 3), would support being both sign and
14944 /// zero extended.
14945 bool SupportsSExt;
14946 /// Records if this operand is like being floating-Point extended.
14947 bool SupportsFPExt;
14948 /// This boolean captures whether we care if this operand would still be
14949 /// around after the folding happens.
14950 bool EnforceOneUse;
14951 /// Original value that this NodeExtensionHelper represents.
14952 SDValue OrigOperand;
14953
14954 /// Get the value feeding the extension or the value itself.
14955 /// E.g., for zext(a), this would return a.
14956 SDValue getSource() const {
14957 switch (OrigOperand.getOpcode()) {
14958 case ISD::ZERO_EXTEND:
14959 case ISD::SIGN_EXTEND:
14960 case RISCVISD::VSEXT_VL:
14961 case RISCVISD::VZEXT_VL:
14963 return OrigOperand.getOperand(0);
14964 default:
14965 return OrigOperand;
14966 }
14967 }
14968
14969 /// Check if this instance represents a splat.
14970 bool isSplat() const {
14971 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
14972 OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
14973 }
14974
14975 /// Get the extended opcode.
14976 unsigned getExtOpc(ExtKind SupportsExt) const {
14977 switch (SupportsExt) {
14978 case ExtKind::SExt:
14979 return RISCVISD::VSEXT_VL;
14980 case ExtKind::ZExt:
14981 return RISCVISD::VZEXT_VL;
14982 case ExtKind::FPExt:
14984 }
14985 llvm_unreachable("Unknown ExtKind enum");
14986 }
14987
14988 /// Get or create a value that can feed \p Root with the given extension \p
14989 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
14990 /// operand. \see ::getSource().
14991 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
14992 const RISCVSubtarget &Subtarget,
14993 std::optional<ExtKind> SupportsExt) const {
14994 if (!SupportsExt.has_value())
14995 return OrigOperand;
14996
14997 MVT NarrowVT = getNarrowType(Root, *SupportsExt);
14998
14999 SDValue Source = getSource();
15000 assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
15001 if (Source.getValueType() == NarrowVT)
15002 return Source;
15003
15004 // vfmadd_vl -> vfwmadd_vl can take bf16 operands
15005 if (Source.getValueType().getVectorElementType() == MVT::bf16) {
15006 assert(Root->getSimpleValueType(0).getVectorElementType() == MVT::f32 &&
15007 Root->getOpcode() == RISCVISD::VFMADD_VL);
15008 return Source;
15009 }
15010
15011 unsigned ExtOpc = getExtOpc(*SupportsExt);
15012
15013 // If we need an extension, we should be changing the type.
15014 SDLoc DL(OrigOperand);
15015 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
15016 switch (OrigOperand.getOpcode()) {
15017 case ISD::ZERO_EXTEND:
15018 case ISD::SIGN_EXTEND:
15019 case RISCVISD::VSEXT_VL:
15020 case RISCVISD::VZEXT_VL:
15022 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
15023 case ISD::SPLAT_VECTOR:
15024 return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
15026 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
15027 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
15029 Source = Source.getOperand(1);
15030 assert(Source.getOpcode() == ISD::FP_EXTEND && "Unexpected source");
15031 Source = Source.getOperand(0);
15032 assert(Source.getValueType() == NarrowVT.getVectorElementType());
15033 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, NarrowVT,
15034 DAG.getUNDEF(NarrowVT), Source, VL);
15035 default:
15036 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
15037 // and that operand should already have the right NarrowVT so no
15038 // extension should be required at this point.
15039 llvm_unreachable("Unsupported opcode");
15040 }
15041 }
15042
15043 /// Helper function to get the narrow type for \p Root.
15044 /// The narrow type is the type of \p Root where we divided the size of each
15045 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
15046 /// \pre Both the narrow type and the original type should be legal.
15047 static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {
15048 MVT VT = Root->getSimpleValueType(0);
15049
15050 // Determine the narrow size.
15051 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
15052
15053 MVT EltVT = SupportsExt == ExtKind::FPExt
15054 ? MVT::getFloatingPointVT(NarrowSize)
15055 : MVT::getIntegerVT(NarrowSize);
15056
15057 assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&
15058 "Trying to extend something we can't represent");
15059 MVT NarrowVT = MVT::getVectorVT(EltVT, VT.getVectorElementCount());
15060 return NarrowVT;
15061 }
15062
15063 /// Get the opcode to materialize:
15064 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
15065 static unsigned getSExtOpcode(unsigned Opcode) {
15066 switch (Opcode) {
15067 case ISD::ADD:
15068 case RISCVISD::ADD_VL:
15071 case ISD::OR:
15072 return RISCVISD::VWADD_VL;
15073 case ISD::SUB:
15074 case RISCVISD::SUB_VL:
15077 return RISCVISD::VWSUB_VL;
15078 case ISD::MUL:
15079 case RISCVISD::MUL_VL:
15080 return RISCVISD::VWMUL_VL;
15081 default:
15082 llvm_unreachable("Unexpected opcode");
15083 }
15084 }
15085
15086 /// Get the opcode to materialize:
15087 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
15088 static unsigned getZExtOpcode(unsigned Opcode) {
15089 switch (Opcode) {
15090 case ISD::ADD:
15091 case RISCVISD::ADD_VL:
15094 case ISD::OR:
15095 return RISCVISD::VWADDU_VL;
15096 case ISD::SUB:
15097 case RISCVISD::SUB_VL:
15100 return RISCVISD::VWSUBU_VL;
15101 case ISD::MUL:
15102 case RISCVISD::MUL_VL:
15103 return RISCVISD::VWMULU_VL;
15104 case ISD::SHL:
15105 case RISCVISD::SHL_VL:
15106 return RISCVISD::VWSLL_VL;
15107 default:
15108 llvm_unreachable("Unexpected opcode");
15109 }
15110 }
15111
15112 /// Get the opcode to materialize:
15113 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
15114 static unsigned getFPExtOpcode(unsigned Opcode) {
15115 switch (Opcode) {
15116 case RISCVISD::FADD_VL:
15118 return RISCVISD::VFWADD_VL;
15119 case RISCVISD::FSUB_VL:
15121 return RISCVISD::VFWSUB_VL;
15122 case RISCVISD::FMUL_VL:
15123 return RISCVISD::VFWMUL_VL;
15125 return RISCVISD::VFWMADD_VL;
15127 return RISCVISD::VFWMSUB_VL;
15129 return RISCVISD::VFWNMADD_VL;
15131 return RISCVISD::VFWNMSUB_VL;
15132 default:
15133 llvm_unreachable("Unexpected opcode");
15134 }
15135 }
15136
15137 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
15138 /// newOpcode(a, b).
15139 static unsigned getSUOpcode(unsigned Opcode) {
15140 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
15141 "SU is only supported for MUL");
15142 return RISCVISD::VWMULSU_VL;
15143 }
15144
15145 /// Get the opcode to materialize
15146 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).
15147 static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
15148 switch (Opcode) {
15149 case ISD::ADD:
15150 case RISCVISD::ADD_VL:
15151 case ISD::OR:
15152 return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL
15154 case ISD::SUB:
15155 case RISCVISD::SUB_VL:
15156 return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL
15158 case RISCVISD::FADD_VL:
15159 return RISCVISD::VFWADD_W_VL;
15160 case RISCVISD::FSUB_VL:
15161 return RISCVISD::VFWSUB_W_VL;
15162 default:
15163 llvm_unreachable("Unexpected opcode");
15164 }
15165 }
15166
15167 using CombineToTry = std::function<std::optional<CombineResult>(
15168 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
15169 const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
15170 const RISCVSubtarget &)>;
15171
15172 /// Check if this node needs to be fully folded or extended for all users.
15173 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
15174
15175 void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
15176 const RISCVSubtarget &Subtarget) {
15177 unsigned Opc = OrigOperand.getOpcode();
15178 MVT VT = OrigOperand.getSimpleValueType();
15179
15180 assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
15181 "Unexpected Opcode");
15182
15183 // The pasthru must be undef for tail agnostic.
15184 if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
15185 return;
15186
15187 // Get the scalar value.
15188 SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)
15189 : OrigOperand.getOperand(1);
15190
15191 // See if we have enough sign bits or zero bits in the scalar to use a
15192 // widening opcode by splatting to smaller element size.
15193 unsigned EltBits = VT.getScalarSizeInBits();
15194 unsigned ScalarBits = Op.getValueSizeInBits();
15195 // If we're not getting all bits from the element, we need special handling.
15196 if (ScalarBits < EltBits) {
15197 // This should only occur on RV32.
15198 assert(Opc == RISCVISD::VMV_V_X_VL && EltBits == 64 && ScalarBits == 32 &&
15199 !Subtarget.is64Bit() && "Unexpected splat");
15200 // vmv.v.x sign extends narrow inputs.
15201 SupportsSExt = true;
15202
15203 // If the input is positive, then sign extend is also zero extend.
15204 if (DAG.SignBitIsZero(Op))
15205 SupportsZExt = true;
15206
15207 EnforceOneUse = false;
15208 return;
15209 }
15210
15211 unsigned NarrowSize = EltBits / 2;
15212 // If the narrow type cannot be expressed with a legal VMV,
15213 // this is not a valid candidate.
15214 if (NarrowSize < 8)
15215 return;
15216
15217 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
15218 SupportsSExt = true;
15219
15220 if (DAG.MaskedValueIsZero(Op,
15221 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
15222 SupportsZExt = true;
15223
15224 EnforceOneUse = false;
15225 }
15226
15227 bool isSupportedFPExtend(SDNode *Root, MVT NarrowEltVT,
15228 const RISCVSubtarget &Subtarget) {
15229 // Any f16 extension will neeed zvfh
15230 if (NarrowEltVT == MVT::f16 && !Subtarget.hasVInstructionsF16())
15231 return false;
15232 // The only bf16 extension we can do is vfmadd_vl -> vfwmadd_vl with
15233 // zvfbfwma
15234 if (NarrowEltVT == MVT::bf16 && (!Subtarget.hasStdExtZvfbfwma() ||
15235 Root->getOpcode() != RISCVISD::VFMADD_VL))
15236 return false;
15237 return true;
15238 }
15239
15240 /// Helper method to set the various fields of this struct based on the
15241 /// type of \p Root.
15242 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
15243 const RISCVSubtarget &Subtarget) {
15244 SupportsZExt = false;
15245 SupportsSExt = false;
15246 SupportsFPExt = false;
15247 EnforceOneUse = true;
15248 unsigned Opc = OrigOperand.getOpcode();
15249 // For the nodes we handle below, we end up using their inputs directly: see
15250 // getSource(). However since they either don't have a passthru or we check
15251 // that their passthru is undef, we can safely ignore their mask and VL.
15252 switch (Opc) {
15253 case ISD::ZERO_EXTEND:
15254 case ISD::SIGN_EXTEND: {
15255 MVT VT = OrigOperand.getSimpleValueType();
15256 if (!VT.isVector())
15257 break;
15258
15259 SDValue NarrowElt = OrigOperand.getOperand(0);
15260 MVT NarrowVT = NarrowElt.getSimpleValueType();
15261 // i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
15262 if (NarrowVT.getVectorElementType() == MVT::i1)
15263 break;
15264
15265 SupportsZExt = Opc == ISD::ZERO_EXTEND;
15266 SupportsSExt = Opc == ISD::SIGN_EXTEND;
15267 break;
15268 }
15269 case RISCVISD::VZEXT_VL:
15270 SupportsZExt = true;
15271 break;
15272 case RISCVISD::VSEXT_VL:
15273 SupportsSExt = true;
15274 break;
15276 MVT NarrowEltVT =
15278 if (!isSupportedFPExtend(Root, NarrowEltVT, Subtarget))
15279 break;
15280 SupportsFPExt = true;
15281 break;
15282 }
15283 case ISD::SPLAT_VECTOR:
15285 fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
15286 break;
15287 case RISCVISD::VFMV_V_F_VL: {
15288 MVT VT = OrigOperand.getSimpleValueType();
15289
15290 if (!OrigOperand.getOperand(0).isUndef())
15291 break;
15292
15293 SDValue Op = OrigOperand.getOperand(1);
15294 if (Op.getOpcode() != ISD::FP_EXTEND)
15295 break;
15296
15297 if (!isSupportedFPExtend(Root, Op.getOperand(0).getSimpleValueType(),
15298 Subtarget))
15299 break;
15300
15301 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
15302 unsigned ScalarBits = Op.getOperand(0).getValueSizeInBits();
15303 if (NarrowSize != ScalarBits)
15304 break;
15305
15306 SupportsFPExt = true;
15307 break;
15308 }
15309 default:
15310 break;
15311 }
15312 }
15313
15314 /// Check if \p Root supports any extension folding combines.
15315 static bool isSupportedRoot(const SDNode *Root,
15316 const RISCVSubtarget &Subtarget) {
15317 switch (Root->getOpcode()) {
15318 case ISD::ADD:
15319 case ISD::SUB:
15320 case ISD::MUL: {
15321 return Root->getValueType(0).isScalableVector();
15322 }
15323 case ISD::OR: {
15324 return Root->getValueType(0).isScalableVector() &&
15325 Root->getFlags().hasDisjoint();
15326 }
15327 // Vector Widening Integer Add/Sub/Mul Instructions
15328 case RISCVISD::ADD_VL:
15329 case RISCVISD::MUL_VL:
15332 case RISCVISD::SUB_VL:
15335 // Vector Widening Floating-Point Add/Sub/Mul Instructions
15336 case RISCVISD::FADD_VL:
15337 case RISCVISD::FSUB_VL:
15338 case RISCVISD::FMUL_VL:
15341 return true;
15342 case ISD::SHL:
15343 return Root->getValueType(0).isScalableVector() &&
15344 Subtarget.hasStdExtZvbb();
15345 case RISCVISD::SHL_VL:
15346 return Subtarget.hasStdExtZvbb();
15351 return true;
15352 default:
15353 return false;
15354 }
15355 }
15356
15357 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
15358 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
15359 const RISCVSubtarget &Subtarget) {
15360 assert(isSupportedRoot(Root, Subtarget) &&
15361 "Trying to build an helper with an "
15362 "unsupported root");
15363 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
15365 OrigOperand = Root->getOperand(OperandIdx);
15366
15367 unsigned Opc = Root->getOpcode();
15368 switch (Opc) {
15369 // We consider
15370 // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))
15371 // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))
15372 // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))
15379 if (OperandIdx == 1) {
15380 SupportsZExt =
15382 SupportsSExt =
15384 SupportsFPExt =
15386 // There's no existing extension here, so we don't have to worry about
15387 // making sure it gets removed.
15388 EnforceOneUse = false;
15389 break;
15390 }
15391 [[fallthrough]];
15392 default:
15393 fillUpExtensionSupport(Root, DAG, Subtarget);
15394 break;
15395 }
15396 }
15397
15398 /// Helper function to get the Mask and VL from \p Root.
15399 static std::pair<SDValue, SDValue>
15400 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
15401 const RISCVSubtarget &Subtarget) {
15402 assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
15403 switch (Root->getOpcode()) {
15404 case ISD::ADD:
15405 case ISD::SUB:
15406 case ISD::MUL:
15407 case ISD::OR:
15408 case ISD::SHL: {
15409 SDLoc DL(Root);
15410 MVT VT = Root->getSimpleValueType(0);
15411 return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
15412 }
15413 default:
15414 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
15415 }
15416 }
15417
15418 /// Helper function to check if \p N is commutative with respect to the
15419 /// foldings that are supported by this class.
15420 static bool isCommutative(const SDNode *N) {
15421 switch (N->getOpcode()) {
15422 case ISD::ADD:
15423 case ISD::MUL:
15424 case ISD::OR:
15425 case RISCVISD::ADD_VL:
15426 case RISCVISD::MUL_VL:
15429 case RISCVISD::FADD_VL:
15430 case RISCVISD::FMUL_VL:
15436 return true;
15437 case ISD::SUB:
15438 case RISCVISD::SUB_VL:
15441 case RISCVISD::FSUB_VL:
15443 case ISD::SHL:
15444 case RISCVISD::SHL_VL:
15445 return false;
15446 default:
15447 llvm_unreachable("Unexpected opcode");
15448 }
15449 }
15450
15451 /// Get a list of combine to try for folding extensions in \p Root.
15452 /// Note that each returned CombineToTry function doesn't actually modify
15453 /// anything. Instead they produce an optional CombineResult that if not None,
15454 /// need to be materialized for the combine to be applied.
15455 /// \see CombineResult::materialize.
15456 /// If the related CombineToTry function returns std::nullopt, that means the
15457 /// combine didn't match.
15458 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
15459};
15460
15461/// Helper structure that holds all the necessary information to materialize a
15462/// combine that does some extension folding.
15463struct CombineResult {
15464 /// Opcode to be generated when materializing the combine.
15465 unsigned TargetOpcode;
15466 // No value means no extension is needed.
15467 std::optional<ExtKind> LHSExt;
15468 std::optional<ExtKind> RHSExt;
15469 /// Root of the combine.
15470 SDNode *Root;
15471 /// LHS of the TargetOpcode.
15472 NodeExtensionHelper LHS;
15473 /// RHS of the TargetOpcode.
15474 NodeExtensionHelper RHS;
15475
15476 CombineResult(unsigned TargetOpcode, SDNode *Root,
15477 const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
15478 const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
15479 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),
15480 LHS(LHS), RHS(RHS) {}
15481
15482 /// Return a value that uses TargetOpcode and that can be used to replace
15483 /// Root.
15484 /// The actual replacement is *not* done in that method.
15485 SDValue materialize(SelectionDAG &DAG,
15486 const RISCVSubtarget &Subtarget) const {
15487 SDValue Mask, VL, Passthru;
15488 std::tie(Mask, VL) =
15489 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
15490 switch (Root->getOpcode()) {
15491 default:
15492 Passthru = Root->getOperand(2);
15493 break;
15494 case ISD::ADD:
15495 case ISD::SUB:
15496 case ISD::MUL:
15497 case ISD::OR:
15498 case ISD::SHL:
15499 Passthru = DAG.getUNDEF(Root->getValueType(0));
15500 break;
15501 }
15502 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
15503 LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),
15504 RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),
15505 Passthru, Mask, VL);
15506 }
15507};
15508
15509/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
15510/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
15511/// are zext) and LHS and RHS can be folded into Root.
15512/// AllowExtMask define which form `ext` can take in this pattern.
15513///
15514/// \note If the pattern can match with both zext and sext, the returned
15515/// CombineResult will feature the zext result.
15516///
15517/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15518/// can be used to apply the pattern.
15519static std::optional<CombineResult>
15520canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
15521 const NodeExtensionHelper &RHS,
15522 uint8_t AllowExtMask, SelectionDAG &DAG,
15523 const RISCVSubtarget &Subtarget) {
15524 if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)
15525 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
15526 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
15527 /*RHSExt=*/{ExtKind::ZExt});
15528 if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)
15529 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
15530 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
15531 /*RHSExt=*/{ExtKind::SExt});
15532 if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)
15533 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
15534 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
15535 /*RHSExt=*/{ExtKind::FPExt});
15536 return std::nullopt;
15537}
15538
15539/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
15540/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
15541/// are zext) and LHS and RHS can be folded into Root.
15542///
15543/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15544/// can be used to apply the pattern.
15545static std::optional<CombineResult>
15546canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
15547 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15548 const RISCVSubtarget &Subtarget) {
15549 return canFoldToVWWithSameExtensionImpl(
15550 Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,
15551 Subtarget);
15552}
15553
15554/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
15555///
15556/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15557/// can be used to apply the pattern.
15558static std::optional<CombineResult>
15559canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
15560 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15561 const RISCVSubtarget &Subtarget) {
15562 if (RHS.SupportsFPExt)
15563 return CombineResult(
15564 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),
15565 Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt});
15566
15567 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
15568 // sext/zext?
15569 // Control this behavior behind an option (AllowSplatInVW_W) for testing
15570 // purposes.
15571 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
15572 return CombineResult(
15573 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,
15574 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt});
15575 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
15576 return CombineResult(
15577 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,
15578 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt});
15579 return std::nullopt;
15580}
15581
15582/// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
15583///
15584/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15585/// can be used to apply the pattern.
15586static std::optional<CombineResult>
15587canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
15588 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15589 const RISCVSubtarget &Subtarget) {
15590 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::SExt, DAG,
15591 Subtarget);
15592}
15593
15594/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
15595///
15596/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15597/// can be used to apply the pattern.
15598static std::optional<CombineResult>
15599canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
15600 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15601 const RISCVSubtarget &Subtarget) {
15602 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,
15603 Subtarget);
15604}
15605
15606/// Check if \p Root follows a pattern Root(fpext(LHS), fpext(RHS))
15607///
15608/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15609/// can be used to apply the pattern.
15610static std::optional<CombineResult>
15611canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS,
15612 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15613 const RISCVSubtarget &Subtarget) {
15614 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::FPExt, DAG,
15615 Subtarget);
15616}
15617
15618/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
15619///
15620/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15621/// can be used to apply the pattern.
15622static std::optional<CombineResult>
15623canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
15624 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15625 const RISCVSubtarget &Subtarget) {
15626
15627 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
15628 return std::nullopt;
15629 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
15630 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
15631 /*RHSExt=*/{ExtKind::ZExt});
15632}
15633
15635NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
15636 SmallVector<CombineToTry> Strategies;
15637 switch (Root->getOpcode()) {
15638 case ISD::ADD:
15639 case ISD::SUB:
15640 case ISD::OR:
15641 case RISCVISD::ADD_VL:
15642 case RISCVISD::SUB_VL:
15643 case RISCVISD::FADD_VL:
15644 case RISCVISD::FSUB_VL:
15645 // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub
15646 Strategies.push_back(canFoldToVWWithSameExtension);
15647 // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w
15648 Strategies.push_back(canFoldToVW_W);
15649 break;
15650 case RISCVISD::FMUL_VL:
15655 Strategies.push_back(canFoldToVWWithSameExtension);
15656 break;
15657 case ISD::MUL:
15658 case RISCVISD::MUL_VL:
15659 // mul -> vwmul(u)
15660 Strategies.push_back(canFoldToVWWithSameExtension);
15661 // mul -> vwmulsu
15662 Strategies.push_back(canFoldToVW_SU);
15663 break;
15664 case ISD::SHL:
15665 case RISCVISD::SHL_VL:
15666 // shl -> vwsll
15667 Strategies.push_back(canFoldToVWWithZEXT);
15668 break;
15671 // vwadd_w|vwsub_w -> vwadd|vwsub
15672 Strategies.push_back(canFoldToVWWithSEXT);
15673 break;
15676 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
15677 Strategies.push_back(canFoldToVWWithZEXT);
15678 break;
15681 // vfwadd_w|vfwsub_w -> vfwadd|vfwsub
15682 Strategies.push_back(canFoldToVWWithFPEXT);
15683 break;
15684 default:
15685 llvm_unreachable("Unexpected opcode");
15686 }
15687 return Strategies;
15688}
15689} // End anonymous namespace.
15690
15691/// Combine a binary or FMA operation to its equivalent VW or VW_W form.
15692/// The supported combines are:
15693/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
15694/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
15695/// mul | mul_vl -> vwmul(u) | vwmul_su
15696/// shl | shl_vl -> vwsll
15697/// fadd_vl -> vfwadd | vfwadd_w
15698/// fsub_vl -> vfwsub | vfwsub_w
15699/// fmul_vl -> vfwmul
15700/// vwadd_w(u) -> vwadd(u)
15701/// vwsub_w(u) -> vwsub(u)
15702/// vfwadd_w -> vfwadd
15703/// vfwsub_w -> vfwsub
15706 const RISCVSubtarget &Subtarget) {
15707 SelectionDAG &DAG = DCI.DAG;
15708 if (DCI.isBeforeLegalize())
15709 return SDValue();
15710
15711 if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))
15712 return SDValue();
15713
15714 SmallVector<SDNode *> Worklist;
15715 SmallSet<SDNode *, 8> Inserted;
15716 Worklist.push_back(N);
15717 Inserted.insert(N);
15718 SmallVector<CombineResult> CombinesToApply;
15719
15720 while (!Worklist.empty()) {
15721 SDNode *Root = Worklist.pop_back_val();
15722
15723 NodeExtensionHelper LHS(Root, 0, DAG, Subtarget);
15724 NodeExtensionHelper RHS(Root, 1, DAG, Subtarget);
15725 auto AppendUsersIfNeeded = [&Worklist, &Subtarget,
15726 &Inserted](const NodeExtensionHelper &Op) {
15727 if (Op.needToPromoteOtherUsers()) {
15728 for (SDUse &Use : Op.OrigOperand->uses()) {
15729 SDNode *TheUser = Use.getUser();
15730 if (!NodeExtensionHelper::isSupportedRoot(TheUser, Subtarget))
15731 return false;
15732 // We only support the first 2 operands of FMA.
15733 if (Use.getOperandNo() >= 2)
15734 return false;
15735 if (Inserted.insert(TheUser).second)
15736 Worklist.push_back(TheUser);
15737 }
15738 }
15739 return true;
15740 };
15741
15742 // Control the compile time by limiting the number of node we look at in
15743 // total.
15744 if (Inserted.size() > ExtensionMaxWebSize)
15745 return SDValue();
15746
15748 NodeExtensionHelper::getSupportedFoldings(Root);
15749
15750 assert(!FoldingStrategies.empty() && "Nothing to be folded");
15751 bool Matched = false;
15752 for (int Attempt = 0;
15753 (Attempt != 1 + NodeExtensionHelper::isCommutative(Root)) && !Matched;
15754 ++Attempt) {
15755
15756 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
15757 FoldingStrategies) {
15758 std::optional<CombineResult> Res =
15759 FoldingStrategy(Root, LHS, RHS, DAG, Subtarget);
15760 if (Res) {
15761 Matched = true;
15762 CombinesToApply.push_back(*Res);
15763 // All the inputs that are extended need to be folded, otherwise
15764 // we would be leaving the old input (since it is may still be used),
15765 // and the new one.
15766 if (Res->LHSExt.has_value())
15767 if (!AppendUsersIfNeeded(LHS))
15768 return SDValue();
15769 if (Res->RHSExt.has_value())
15770 if (!AppendUsersIfNeeded(RHS))
15771 return SDValue();
15772 break;
15773 }
15774 }
15775 std::swap(LHS, RHS);
15776 }
15777 // Right now we do an all or nothing approach.
15778 if (!Matched)
15779 return SDValue();
15780 }
15781 // Store the value for the replacement of the input node separately.
15782 SDValue InputRootReplacement;
15783 // We do the RAUW after we materialize all the combines, because some replaced
15784 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
15785 // some of these nodes may appear in the NodeExtensionHelpers of some of the
15786 // yet-to-be-visited CombinesToApply roots.
15788 ValuesToReplace.reserve(CombinesToApply.size());
15789 for (CombineResult Res : CombinesToApply) {
15790 SDValue NewValue = Res.materialize(DAG, Subtarget);
15791 if (!InputRootReplacement) {
15792 assert(Res.Root == N &&
15793 "First element is expected to be the current node");
15794 InputRootReplacement = NewValue;
15795 } else {
15796 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
15797 }
15798 }
15799 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
15800 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
15801 DCI.AddToWorklist(OldNewValues.second.getNode());
15802 }
15803 return InputRootReplacement;
15804}
15805
15806// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
15807// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
15808// y will be the Passthru and cond will be the Mask.
15810 unsigned Opc = N->getOpcode();
15813
15814 SDValue Y = N->getOperand(0);
15815 SDValue MergeOp = N->getOperand(1);
15816 unsigned MergeOpc = MergeOp.getOpcode();
15817
15818 if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
15819 return SDValue();
15820
15821 SDValue X = MergeOp->getOperand(1);
15822
15823 if (!MergeOp.hasOneUse())
15824 return SDValue();
15825
15826 // Passthru should be undef
15827 SDValue Passthru = N->getOperand(2);
15828 if (!Passthru.isUndef())
15829 return SDValue();
15830
15831 // Mask should be all ones
15832 SDValue Mask = N->getOperand(3);
15833 if (Mask.getOpcode() != RISCVISD::VMSET_VL)
15834 return SDValue();
15835
15836 // False value of MergeOp should be all zeros
15837 SDValue Z = MergeOp->getOperand(2);
15838
15839 if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
15840 (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))
15841 Z = Z.getOperand(1);
15842
15843 if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))
15844 return SDValue();
15845
15846 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),
15847 {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},
15848 N->getFlags());
15849}
15850
15853 const RISCVSubtarget &Subtarget) {
15854 [[maybe_unused]] unsigned Opc = N->getOpcode();
15857
15858 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
15859 return V;
15860
15861 return combineVWADDSUBWSelect(N, DCI.DAG);
15862}
15863
15864// Helper function for performMemPairCombine.
15865// Try to combine the memory loads/stores LSNode1 and LSNode2
15866// into a single memory pair operation.
15868 LSBaseSDNode *LSNode2, SDValue BasePtr,
15869 uint64_t Imm) {
15871 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
15872
15873 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
15874 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
15875 return SDValue();
15876
15878 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
15879
15880 // The new operation has twice the width.
15881 MVT XLenVT = Subtarget.getXLenVT();
15882 EVT MemVT = LSNode1->getMemoryVT();
15883 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
15884 MachineMemOperand *MMO = LSNode1->getMemOperand();
15886 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
15887
15888 if (LSNode1->getOpcode() == ISD::LOAD) {
15889 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
15890 unsigned Opcode;
15891 if (MemVT == MVT::i32)
15892 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
15893 else
15894 Opcode = RISCVISD::TH_LDD;
15895
15896 SDValue Res = DAG.getMemIntrinsicNode(
15897 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
15898 {LSNode1->getChain(), BasePtr,
15899 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
15900 NewMemVT, NewMMO);
15901
15902 SDValue Node1 =
15903 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
15904 SDValue Node2 =
15905 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
15906
15907 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
15908 return Node1;
15909 } else {
15910 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
15911
15912 SDValue Res = DAG.getMemIntrinsicNode(
15913 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
15914 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
15915 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
15916 NewMemVT, NewMMO);
15917
15918 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
15919 return Res;
15920 }
15921}
15922
15923// Try to combine two adjacent loads/stores to a single pair instruction from
15924// the XTHeadMemPair vendor extension.
15927 SelectionDAG &DAG = DCI.DAG;
15929 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
15930
15931 // Target does not support load/store pair.
15932 if (!Subtarget.hasVendorXTHeadMemPair())
15933 return SDValue();
15934
15935 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
15936 EVT MemVT = LSNode1->getMemoryVT();
15937 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
15938
15939 // No volatile, indexed or atomic loads/stores.
15940 if (!LSNode1->isSimple() || LSNode1->isIndexed())
15941 return SDValue();
15942
15943 // Function to get a base + constant representation from a memory value.
15944 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
15945 if (Ptr->getOpcode() == ISD::ADD)
15946 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
15947 return {Ptr->getOperand(0), C1->getZExtValue()};
15948 return {Ptr, 0};
15949 };
15950
15951 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
15952
15953 SDValue Chain = N->getOperand(0);
15954 for (SDUse &Use : Chain->uses()) {
15955 if (Use.getUser() != N && Use.getResNo() == 0 &&
15956 Use.getUser()->getOpcode() == N->getOpcode()) {
15957 LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser());
15958
15959 // No volatile, indexed or atomic loads/stores.
15960 if (!LSNode2->isSimple() || LSNode2->isIndexed())
15961 continue;
15962
15963 // Check if LSNode1 and LSNode2 have the same type and extension.
15964 if (LSNode1->getOpcode() == ISD::LOAD)
15965 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
15966 cast<LoadSDNode>(LSNode1)->getExtensionType())
15967 continue;
15968
15969 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
15970 continue;
15971
15972 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
15973
15974 // Check if the base pointer is the same for both instruction.
15975 if (Base1 != Base2)
15976 continue;
15977
15978 // Check if the offsets match the XTHeadMemPair encoding contraints.
15979 bool Valid = false;
15980 if (MemVT == MVT::i32) {
15981 // Check for adjacent i32 values and a 2-bit index.
15982 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
15983 Valid = true;
15984 } else if (MemVT == MVT::i64) {
15985 // Check for adjacent i64 values and a 2-bit index.
15986 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
15987 Valid = true;
15988 }
15989
15990 if (!Valid)
15991 continue;
15992
15993 // Try to combine.
15994 if (SDValue Res =
15995 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
15996 return Res;
15997 }
15998 }
15999
16000 return SDValue();
16001}
16002
16003// Fold
16004// (fp_to_int (froundeven X)) -> fcvt X, rne
16005// (fp_to_int (ftrunc X)) -> fcvt X, rtz
16006// (fp_to_int (ffloor X)) -> fcvt X, rdn
16007// (fp_to_int (fceil X)) -> fcvt X, rup
16008// (fp_to_int (fround X)) -> fcvt X, rmm
16009// (fp_to_int (frint X)) -> fcvt X
16012 const RISCVSubtarget &Subtarget) {
16013 SelectionDAG &DAG = DCI.DAG;
16014 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16015 MVT XLenVT = Subtarget.getXLenVT();
16016
16017 SDValue Src = N->getOperand(0);
16018
16019 // Don't do this for strict-fp Src.
16020 if (Src->isStrictFPOpcode())
16021 return SDValue();
16022
16023 // Ensure the FP type is legal.
16024 if (!TLI.isTypeLegal(Src.getValueType()))
16025 return SDValue();
16026
16027 // Don't do this for f16 with Zfhmin and not Zfh.
16028 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
16029 return SDValue();
16030
16031 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
16032 // If the result is invalid, we didn't find a foldable instruction.
16033 if (FRM == RISCVFPRndMode::Invalid)
16034 return SDValue();
16035
16036 SDLoc DL(N);
16037 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
16038 EVT VT = N->getValueType(0);
16039
16040 if (VT.isVector() && TLI.isTypeLegal(VT)) {
16041 MVT SrcVT = Src.getSimpleValueType();
16042 MVT SrcContainerVT = SrcVT;
16043 MVT ContainerVT = VT.getSimpleVT();
16044 SDValue XVal = Src.getOperand(0);
16045
16046 // For widening and narrowing conversions we just combine it into a
16047 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
16048 // end up getting lowered to their appropriate pseudo instructions based on
16049 // their operand types
16050 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
16051 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
16052 return SDValue();
16053
16054 // Make fixed-length vectors scalable first
16055 if (SrcVT.isFixedLengthVector()) {
16056 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
16057 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
16058 ContainerVT =
16059 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
16060 }
16061
16062 auto [Mask, VL] =
16063 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
16064
16065 SDValue FpToInt;
16066 if (FRM == RISCVFPRndMode::RTZ) {
16067 // Use the dedicated trunc static rounding mode if we're truncating so we
16068 // don't need to generate calls to fsrmi/fsrm
16069 unsigned Opc =
16071 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
16072 } else {
16073 unsigned Opc =
16075 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
16076 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
16077 }
16078
16079 // If converted from fixed-length to scalable, convert back
16080 if (VT.isFixedLengthVector())
16081 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
16082
16083 return FpToInt;
16084 }
16085
16086 // Only handle XLen or i32 types. Other types narrower than XLen will
16087 // eventually be legalized to XLenVT.
16088 if (VT != MVT::i32 && VT != XLenVT)
16089 return SDValue();
16090
16091 unsigned Opc;
16092 if (VT == XLenVT)
16093 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
16094 else
16096
16097 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
16098 DAG.getTargetConstant(FRM, DL, XLenVT));
16099 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
16100}
16101
16102// Fold
16103// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
16104// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
16105// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
16106// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
16107// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
16108// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
16111 const RISCVSubtarget &Subtarget) {
16112 SelectionDAG &DAG = DCI.DAG;
16113 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16114 MVT XLenVT = Subtarget.getXLenVT();
16115
16116 // Only handle XLen types. Other types narrower than XLen will eventually be
16117 // legalized to XLenVT.
16118 EVT DstVT = N->getValueType(0);
16119 if (DstVT != XLenVT)
16120 return SDValue();
16121
16122 SDValue Src = N->getOperand(0);
16123
16124 // Don't do this for strict-fp Src.
16125 if (Src->isStrictFPOpcode())
16126 return SDValue();
16127
16128 // Ensure the FP type is also legal.
16129 if (!TLI.isTypeLegal(Src.getValueType()))
16130 return SDValue();
16131
16132 // Don't do this for f16 with Zfhmin and not Zfh.
16133 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
16134 return SDValue();
16135
16136 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
16137
16138 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
16139 if (FRM == RISCVFPRndMode::Invalid)
16140 return SDValue();
16141
16142 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
16143
16144 unsigned Opc;
16145 if (SatVT == DstVT)
16146 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
16147 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
16149 else
16150 return SDValue();
16151 // FIXME: Support other SatVTs by clamping before or after the conversion.
16152
16153 Src = Src.getOperand(0);
16154
16155 SDLoc DL(N);
16156 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
16157 DAG.getTargetConstant(FRM, DL, XLenVT));
16158
16159 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
16160 // extend.
16161 if (Opc == RISCVISD::FCVT_WU_RV64)
16162 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
16163
16164 // RISC-V FP-to-int conversions saturate to the destination register size, but
16165 // don't produce 0 for nan.
16166 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
16167 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
16168}
16169
16170// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
16171// smaller than XLenVT.
16173 const RISCVSubtarget &Subtarget) {
16174 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
16175
16176 SDValue Src = N->getOperand(0);
16177 if (Src.getOpcode() != ISD::BSWAP)
16178 return SDValue();
16179
16180 EVT VT = N->getValueType(0);
16181 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
16182 !llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))
16183 return SDValue();
16184
16185 SDLoc DL(N);
16186 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
16187}
16188
16189// Convert from one FMA opcode to another based on whether we are negating the
16190// multiply result and/or the accumulator.
16191// NOTE: Only supports RVV operations with VL.
16192static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
16193 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
16194 if (NegMul) {
16195 // clang-format off
16196 switch (Opcode) {
16197 default: llvm_unreachable("Unexpected opcode");
16198 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
16199 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
16200 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
16201 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
16206 }
16207 // clang-format on
16208 }
16209
16210 // Negating the accumulator changes ADD<->SUB.
16211 if (NegAcc) {
16212 // clang-format off
16213 switch (Opcode) {
16214 default: llvm_unreachable("Unexpected opcode");
16215 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
16216 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
16217 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
16218 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
16223 }
16224 // clang-format on
16225 }
16226
16227 return Opcode;
16228}
16229
16231 // Fold FNEG_VL into FMA opcodes.
16232 // The first operand of strict-fp is chain.
16233 bool IsStrict =
16234 DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode());
16235 unsigned Offset = IsStrict ? 1 : 0;
16236 SDValue A = N->getOperand(0 + Offset);
16237 SDValue B = N->getOperand(1 + Offset);
16238 SDValue C = N->getOperand(2 + Offset);
16239 SDValue Mask = N->getOperand(3 + Offset);
16240 SDValue VL = N->getOperand(4 + Offset);
16241
16242 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
16243 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
16244 V.getOperand(2) == VL) {
16245 // Return the negated input.
16246 V = V.getOperand(0);
16247 return true;
16248 }
16249
16250 return false;
16251 };
16252
16253 bool NegA = invertIfNegative(A);
16254 bool NegB = invertIfNegative(B);
16255 bool NegC = invertIfNegative(C);
16256
16257 // If no operands are negated, we're done.
16258 if (!NegA && !NegB && !NegC)
16259 return SDValue();
16260
16261 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
16262 if (IsStrict)
16263 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
16264 {N->getOperand(0), A, B, C, Mask, VL});
16265 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
16266 VL);
16267}
16268
16271 const RISCVSubtarget &Subtarget) {
16272 SelectionDAG &DAG = DCI.DAG;
16273
16275 return V;
16276
16277 // FIXME: Ignore strict opcodes for now.
16278 if (DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode()))
16279 return SDValue();
16280
16281 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
16282}
16283
16285 const RISCVSubtarget &Subtarget) {
16286 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
16287
16288 EVT VT = N->getValueType(0);
16289
16290 if (VT != Subtarget.getXLenVT())
16291 return SDValue();
16292
16293 if (!isa<ConstantSDNode>(N->getOperand(1)))
16294 return SDValue();
16295 uint64_t ShAmt = N->getConstantOperandVal(1);
16296
16297 SDValue N0 = N->getOperand(0);
16298
16299 // Combine (sra (sext_inreg (shl X, C1), iX), C2) ->
16300 // (sra (shl X, C1+(XLen-iX)), C2+(XLen-iX)) so it gets selected as SLLI+SRAI.
16301 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse()) {
16302 unsigned ExtSize =
16303 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
16304 if (ShAmt < ExtSize && N0.getOperand(0).getOpcode() == ISD::SHL &&
16305 N0.getOperand(0).hasOneUse() &&
16306 isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
16307 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
16308 if (LShAmt < ExtSize) {
16309 unsigned Size = VT.getSizeInBits();
16310 SDLoc ShlDL(N0.getOperand(0));
16311 SDValue Shl =
16312 DAG.getNode(ISD::SHL, ShlDL, VT, N0.getOperand(0).getOperand(0),
16313 DAG.getConstant(LShAmt + (Size - ExtSize), ShlDL, VT));
16314 SDLoc DL(N);
16315 return DAG.getNode(ISD::SRA, DL, VT, Shl,
16316 DAG.getConstant(ShAmt + (Size - ExtSize), DL, VT));
16317 }
16318 }
16319 }
16320
16321 if (ShAmt > 32 || VT != MVT::i64)
16322 return SDValue();
16323
16324 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
16325 // FIXME: Should this be a generic combine? There's a similar combine on X86.
16326 //
16327 // Also try these folds where an add or sub is in the middle.
16328 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
16329 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
16330 SDValue Shl;
16331 ConstantSDNode *AddC = nullptr;
16332
16333 // We might have an ADD or SUB between the SRA and SHL.
16334 bool IsAdd = N0.getOpcode() == ISD::ADD;
16335 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
16336 // Other operand needs to be a constant we can modify.
16337 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
16338 if (!AddC)
16339 return SDValue();
16340
16341 // AddC needs to have at least 32 trailing zeros.
16342 if (llvm::countr_zero(AddC->getZExtValue()) < 32)
16343 return SDValue();
16344
16345 // All users should be a shift by constant less than or equal to 32. This
16346 // ensures we'll do this optimization for each of them to produce an
16347 // add/sub+sext_inreg they can all share.
16348 for (SDNode *U : N0->users()) {
16349 if (U->getOpcode() != ISD::SRA ||
16350 !isa<ConstantSDNode>(U->getOperand(1)) ||
16351 U->getConstantOperandVal(1) > 32)
16352 return SDValue();
16353 }
16354
16355 Shl = N0.getOperand(IsAdd ? 0 : 1);
16356 } else {
16357 // Not an ADD or SUB.
16358 Shl = N0;
16359 }
16360
16361 // Look for a shift left by 32.
16362 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
16363 Shl.getConstantOperandVal(1) != 32)
16364 return SDValue();
16365
16366 // We if we didn't look through an add/sub, then the shl should have one use.
16367 // If we did look through an add/sub, the sext_inreg we create is free so
16368 // we're only creating 2 new instructions. It's enough to only remove the
16369 // original sra+add/sub.
16370 if (!AddC && !Shl.hasOneUse())
16371 return SDValue();
16372
16373 SDLoc DL(N);
16374 SDValue In = Shl.getOperand(0);
16375
16376 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
16377 // constant.
16378 if (AddC) {
16379 SDValue ShiftedAddC =
16380 DAG.getConstant(AddC->getZExtValue() >> 32, DL, MVT::i64);
16381 if (IsAdd)
16382 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
16383 else
16384 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
16385 }
16386
16387 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
16388 DAG.getValueType(MVT::i32));
16389 if (ShAmt == 32)
16390 return SExt;
16391
16392 return DAG.getNode(
16393 ISD::SHL, DL, MVT::i64, SExt,
16394 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
16395}
16396
16397// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
16398// the result is used as the conditon of a br_cc or select_cc we can invert,
16399// inverting the setcc is free, and Z is 0/1. Caller will invert the
16400// br_cc/select_cc.
16402 bool IsAnd = Cond.getOpcode() == ISD::AND;
16403 if (!IsAnd && Cond.getOpcode() != ISD::OR)
16404 return SDValue();
16405
16406 if (!Cond.hasOneUse())
16407 return SDValue();
16408
16409 SDValue Setcc = Cond.getOperand(0);
16410 SDValue Xor = Cond.getOperand(1);
16411 // Canonicalize setcc to LHS.
16412 if (Setcc.getOpcode() != ISD::SETCC)
16413 std::swap(Setcc, Xor);
16414 // LHS should be a setcc and RHS should be an xor.
16415 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
16416 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
16417 return SDValue();
16418
16419 // If the condition is an And, SimplifyDemandedBits may have changed
16420 // (xor Z, 1) to (not Z).
16421 SDValue Xor1 = Xor.getOperand(1);
16422 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
16423 return SDValue();
16424
16425 EVT VT = Cond.getValueType();
16426 SDValue Xor0 = Xor.getOperand(0);
16427
16428 // The LHS of the xor needs to be 0/1.
16430 if (!DAG.MaskedValueIsZero(Xor0, Mask))
16431 return SDValue();
16432
16433 // We can only invert integer setccs.
16434 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
16435 if (!SetCCOpVT.isScalarInteger())
16436 return SDValue();
16437
16438 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
16439 if (ISD::isIntEqualitySetCC(CCVal)) {
16440 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
16441 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
16442 Setcc.getOperand(1), CCVal);
16443 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
16444 // Invert (setlt 0, X) by converting to (setlt X, 1).
16445 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
16446 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
16447 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
16448 // (setlt X, 1) by converting to (setlt 0, X).
16449 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
16450 DAG.getConstant(0, SDLoc(Setcc), VT),
16451 Setcc.getOperand(0), CCVal);
16452 } else
16453 return SDValue();
16454
16455 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
16456 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
16457}
16458
16459// Perform common combines for BR_CC and SELECT_CC condtions.
16460static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
16461 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
16462 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
16463
16464 // As far as arithmetic right shift always saves the sign,
16465 // shift can be omitted.
16466 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
16467 // setge (sra X, N), 0 -> setge X, 0
16468 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
16469 LHS.getOpcode() == ISD::SRA) {
16470 LHS = LHS.getOperand(0);
16471 return true;
16472 }
16473
16474 if (!ISD::isIntEqualitySetCC(CCVal))
16475 return false;
16476
16477 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
16478 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
16479 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
16480 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
16481 // If we're looking for eq 0 instead of ne 0, we need to invert the
16482 // condition.
16483 bool Invert = CCVal == ISD::SETEQ;
16484 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
16485 if (Invert)
16486 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
16487
16488 RHS = LHS.getOperand(1);
16489 LHS = LHS.getOperand(0);
16490 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
16491
16492 CC = DAG.getCondCode(CCVal);
16493 return true;
16494 }
16495
16496 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
16497 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) {
16498 RHS = LHS.getOperand(1);
16499 LHS = LHS.getOperand(0);
16500 return true;
16501 }
16502
16503 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
16504 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
16505 LHS.getOperand(1).getOpcode() == ISD::Constant) {
16506 SDValue LHS0 = LHS.getOperand(0);
16507 if (LHS0.getOpcode() == ISD::AND &&
16508 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
16509 uint64_t Mask = LHS0.getConstantOperandVal(1);
16510 uint64_t ShAmt = LHS.getConstantOperandVal(1);
16511 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
16512 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
16513 CC = DAG.getCondCode(CCVal);
16514
16515 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
16516 LHS = LHS0.getOperand(0);
16517 if (ShAmt != 0)
16518 LHS =
16519 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
16520 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
16521 return true;
16522 }
16523 }
16524 }
16525
16526 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
16527 // This can occur when legalizing some floating point comparisons.
16528 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
16529 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
16530 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
16531 CC = DAG.getCondCode(CCVal);
16532 RHS = DAG.getConstant(0, DL, LHS.getValueType());
16533 return true;
16534 }
16535
16536 if (isNullConstant(RHS)) {
16537 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
16538 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
16539 CC = DAG.getCondCode(CCVal);
16540 LHS = NewCond;
16541 return true;
16542 }
16543 }
16544
16545 return false;
16546}
16547
16548// Fold
16549// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
16550// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
16551// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
16552// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
16554 SDValue TrueVal, SDValue FalseVal,
16555 bool Swapped) {
16556 bool Commutative = true;
16557 unsigned Opc = TrueVal.getOpcode();
16558 switch (Opc) {
16559 default:
16560 return SDValue();
16561 case ISD::SHL:
16562 case ISD::SRA:
16563 case ISD::SRL:
16564 case ISD::SUB:
16565 Commutative = false;
16566 break;
16567 case ISD::ADD:
16568 case ISD::OR:
16569 case ISD::XOR:
16570 break;
16571 }
16572
16573 if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal))
16574 return SDValue();
16575
16576 unsigned OpToFold;
16577 if (FalseVal == TrueVal.getOperand(0))
16578 OpToFold = 0;
16579 else if (Commutative && FalseVal == TrueVal.getOperand(1))
16580 OpToFold = 1;
16581 else
16582 return SDValue();
16583
16584 EVT VT = N->getValueType(0);
16585 SDLoc DL(N);
16586 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
16587 EVT OtherOpVT = OtherOp.getValueType();
16588 SDValue IdentityOperand =
16589 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
16590 if (!Commutative)
16591 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
16592 assert(IdentityOperand && "No identity operand!");
16593
16594 if (Swapped)
16595 std::swap(OtherOp, IdentityOperand);
16596 SDValue NewSel =
16597 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
16598 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
16599}
16600
16601// This tries to get rid of `select` and `icmp` that are being used to handle
16602// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
16604 SDValue Cond = N->getOperand(0);
16605
16606 // This represents either CTTZ or CTLZ instruction.
16607 SDValue CountZeroes;
16608
16609 SDValue ValOnZero;
16610
16611 if (Cond.getOpcode() != ISD::SETCC)
16612 return SDValue();
16613
16614 if (!isNullConstant(Cond->getOperand(1)))
16615 return SDValue();
16616
16617 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
16618 if (CCVal == ISD::CondCode::SETEQ) {
16619 CountZeroes = N->getOperand(2);
16620 ValOnZero = N->getOperand(1);
16621 } else if (CCVal == ISD::CondCode::SETNE) {
16622 CountZeroes = N->getOperand(1);
16623 ValOnZero = N->getOperand(2);
16624 } else {
16625 return SDValue();
16626 }
16627
16628 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
16629 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
16630 CountZeroes = CountZeroes.getOperand(0);
16631
16632 if (CountZeroes.getOpcode() != ISD::CTTZ &&
16633 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
16634 CountZeroes.getOpcode() != ISD::CTLZ &&
16635 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
16636 return SDValue();
16637
16638 if (!isNullConstant(ValOnZero))
16639 return SDValue();
16640
16641 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
16642 if (Cond->getOperand(0) != CountZeroesArgument)
16643 return SDValue();
16644
16645 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
16646 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
16647 CountZeroes.getValueType(), CountZeroesArgument);
16648 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
16649 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
16650 CountZeroes.getValueType(), CountZeroesArgument);
16651 }
16652
16653 unsigned BitWidth = CountZeroes.getValueSizeInBits();
16654 SDValue BitWidthMinusOne =
16655 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
16656
16657 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
16658 CountZeroes, BitWidthMinusOne);
16659 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
16660}
16661
16663 const RISCVSubtarget &Subtarget) {
16664 SDValue Cond = N->getOperand(0);
16665 SDValue True = N->getOperand(1);
16666 SDValue False = N->getOperand(2);
16667 SDLoc DL(N);
16668 EVT VT = N->getValueType(0);
16669 EVT CondVT = Cond.getValueType();
16670
16671 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
16672 return SDValue();
16673
16674 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
16675 // BEXTI, where C is power of 2.
16676 if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&
16677 (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) {
16678 SDValue LHS = Cond.getOperand(0);
16679 SDValue RHS = Cond.getOperand(1);
16680 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16681 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
16682 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
16683 const APInt &MaskVal = LHS.getConstantOperandAPInt(1);
16684 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))
16685 return DAG.getSelect(DL, VT,
16686 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
16687 False, True);
16688 }
16689 }
16690 return SDValue();
16691}
16692
16694 const RISCVSubtarget &Subtarget) {
16695 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
16696 return Folded;
16697
16698 if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
16699 return V;
16700
16701 if (Subtarget.hasConditionalMoveFusion())
16702 return SDValue();
16703
16704 SDValue TrueVal = N->getOperand(1);
16705 SDValue FalseVal = N->getOperand(2);
16706 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
16707 return V;
16708 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
16709}
16710
16711/// If we have a build_vector where each lane is binop X, C, where C
16712/// is a constant (but not necessarily the same constant on all lanes),
16713/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
16714/// We assume that materializing a constant build vector will be no more
16715/// expensive that performing O(n) binops.
16717 const RISCVSubtarget &Subtarget,
16718 const RISCVTargetLowering &TLI) {
16719 SDLoc DL(N);
16720 EVT VT = N->getValueType(0);
16721
16722 assert(!VT.isScalableVector() && "unexpected build vector");
16723
16724 if (VT.getVectorNumElements() == 1)
16725 return SDValue();
16726
16727 const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
16728 if (!TLI.isBinOp(Opcode))
16729 return SDValue();
16730
16731 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
16732 return SDValue();
16733
16734 // This BUILD_VECTOR involves an implicit truncation, and sinking
16735 // truncates through binops is non-trivial.
16736 if (N->op_begin()->getValueType() != VT.getVectorElementType())
16737 return SDValue();
16738
16739 SmallVector<SDValue> LHSOps;
16740 SmallVector<SDValue> RHSOps;
16741 for (SDValue Op : N->ops()) {
16742 if (Op.isUndef()) {
16743 // We can't form a divide or remainder from undef.
16744 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
16745 return SDValue();
16746
16747 LHSOps.push_back(Op);
16748 RHSOps.push_back(Op);
16749 continue;
16750 }
16751
16752 // TODO: We can handle operations which have an neutral rhs value
16753 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
16754 // of profit in a more explicit manner.
16755 if (Op.getOpcode() != Opcode || !Op.hasOneUse())
16756 return SDValue();
16757
16758 LHSOps.push_back(Op.getOperand(0));
16759 if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
16760 !isa<ConstantFPSDNode>(Op.getOperand(1)))
16761 return SDValue();
16762 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
16763 // have different LHS and RHS types.
16764 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
16765 return SDValue();
16766
16767 RHSOps.push_back(Op.getOperand(1));
16768 }
16769
16770 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
16771 DAG.getBuildVector(VT, DL, RHSOps));
16772}
16773
16775 const RISCVSubtarget &Subtarget,
16776 const RISCVTargetLowering &TLI) {
16777 SDValue InVec = N->getOperand(0);
16778 SDValue InVal = N->getOperand(1);
16779 SDValue EltNo = N->getOperand(2);
16780 SDLoc DL(N);
16781
16782 EVT VT = InVec.getValueType();
16783 if (VT.isScalableVector())
16784 return SDValue();
16785
16786 if (!InVec.hasOneUse())
16787 return SDValue();
16788
16789 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
16790 // move the insert_vector_elts into the arms of the binop. Note that
16791 // the new RHS must be a constant.
16792 const unsigned InVecOpcode = InVec->getOpcode();
16793 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
16794 InVal.hasOneUse()) {
16795 SDValue InVecLHS = InVec->getOperand(0);
16796 SDValue InVecRHS = InVec->getOperand(1);
16797 SDValue InValLHS = InVal->getOperand(0);
16798 SDValue InValRHS = InVal->getOperand(1);
16799
16801 return SDValue();
16802 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
16803 return SDValue();
16804 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
16805 // have different LHS and RHS types.
16806 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
16807 return SDValue();
16809 InVecLHS, InValLHS, EltNo);
16811 InVecRHS, InValRHS, EltNo);
16812 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
16813 }
16814
16815 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
16816 // move the insert_vector_elt to the source operand of the concat_vector.
16817 if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
16818 return SDValue();
16819
16820 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
16821 if (!IndexC)
16822 return SDValue();
16823 unsigned Elt = IndexC->getZExtValue();
16824
16825 EVT ConcatVT = InVec.getOperand(0).getValueType();
16826 if (ConcatVT.getVectorElementType() != InVal.getValueType())
16827 return SDValue();
16828 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
16829 SDValue NewIdx = DAG.getVectorIdxConstant(Elt % ConcatNumElts, DL);
16830
16831 unsigned ConcatOpIdx = Elt / ConcatNumElts;
16832 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
16833 ConcatOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ConcatVT,
16834 ConcatOp, InVal, NewIdx);
16835
16836 SmallVector<SDValue> ConcatOps;
16837 ConcatOps.append(InVec->op_begin(), InVec->op_end());
16838 ConcatOps[ConcatOpIdx] = ConcatOp;
16839 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
16840}
16841
16842// If we're concatenating a series of vector loads like
16843// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
16844// Then we can turn this into a strided load by widening the vector elements
16845// vlse32 p, stride=n
16847 const RISCVSubtarget &Subtarget,
16848 const RISCVTargetLowering &TLI) {
16849 SDLoc DL(N);
16850 EVT VT = N->getValueType(0);
16851
16852 // Only perform this combine on legal MVTs.
16853 if (!TLI.isTypeLegal(VT))
16854 return SDValue();
16855
16856 // TODO: Potentially extend this to scalable vectors
16857 if (VT.isScalableVector())
16858 return SDValue();
16859
16860 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
16861 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
16862 !SDValue(BaseLd, 0).hasOneUse())
16863 return SDValue();
16864
16865 EVT BaseLdVT = BaseLd->getValueType(0);
16866
16867 // Go through the loads and check that they're strided
16869 Lds.push_back(BaseLd);
16870 Align Align = BaseLd->getAlign();
16871 for (SDValue Op : N->ops().drop_front()) {
16872 auto *Ld = dyn_cast<LoadSDNode>(Op);
16873 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
16874 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
16875 Ld->getValueType(0) != BaseLdVT)
16876 return SDValue();
16877
16878 Lds.push_back(Ld);
16879
16880 // The common alignment is the most restrictive (smallest) of all the loads
16881 Align = std::min(Align, Ld->getAlign());
16882 }
16883
16884 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
16885 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
16886 LoadSDNode *Ld2) -> std::optional<PtrDiff> {
16887 // If the load ptrs can be decomposed into a common (Base + Index) with a
16888 // common constant stride, then return the constant stride.
16889 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
16890 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
16891 if (BIO1.equalBaseIndex(BIO2, DAG))
16892 return {{BIO2.getOffset() - BIO1.getOffset(), false}};
16893
16894 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
16895 SDValue P1 = Ld1->getBasePtr();
16896 SDValue P2 = Ld2->getBasePtr();
16897 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
16898 return {{P2.getOperand(1), false}};
16899 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
16900 return {{P1.getOperand(1), true}};
16901
16902 return std::nullopt;
16903 };
16904
16905 // Get the distance between the first and second loads
16906 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
16907 if (!BaseDiff)
16908 return SDValue();
16909
16910 // Check all the loads are the same distance apart
16911 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
16912 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
16913 return SDValue();
16914
16915 // TODO: At this point, we've successfully matched a generalized gather
16916 // load. Maybe we should emit that, and then move the specialized
16917 // matchers above and below into a DAG combine?
16918
16919 // Get the widened scalar type, e.g. v4i8 -> i64
16920 unsigned WideScalarBitWidth =
16921 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
16922 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
16923
16924 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
16925 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
16926 if (!TLI.isTypeLegal(WideVecVT))
16927 return SDValue();
16928
16929 // Check that the operation is legal
16930 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
16931 return SDValue();
16932
16933 auto [StrideVariant, MustNegateStride] = *BaseDiff;
16934 SDValue Stride =
16935 std::holds_alternative<SDValue>(StrideVariant)
16936 ? std::get<SDValue>(StrideVariant)
16937 : DAG.getSignedConstant(std::get<int64_t>(StrideVariant), DL,
16938 Lds[0]->getOffset().getValueType());
16939 if (MustNegateStride)
16940 Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
16941
16942 SDValue AllOneMask =
16943 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
16944 DAG.getConstant(1, DL, MVT::i1));
16945
16946 uint64_t MemSize;
16947 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
16948 ConstStride && ConstStride->getSExtValue() >= 0)
16949 // total size = (elsize * n) + (stride - elsize) * (n-1)
16950 // = elsize + stride * (n-1)
16951 MemSize = WideScalarVT.getSizeInBits() +
16952 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
16953 else
16954 // If Stride isn't constant, then we can't know how much it will load
16956
16958 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
16959 Align);
16960
16961 SDValue StridedLoad = DAG.getStridedLoadVP(
16962 WideVecVT, DL, BaseLd->getChain(), BaseLd->getBasePtr(), Stride,
16963 AllOneMask,
16964 DAG.getConstant(N->getNumOperands(), DL, Subtarget.getXLenVT()), MMO);
16965
16966 for (SDValue Ld : N->ops())
16967 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
16968
16969 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
16970}
16971
16973 const RISCVSubtarget &Subtarget) {
16974
16975 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
16976
16977 if (N->getValueType(0).isFixedLengthVector())
16978 return SDValue();
16979
16980 SDValue Addend = N->getOperand(0);
16981 SDValue MulOp = N->getOperand(1);
16982
16983 if (N->getOpcode() == RISCVISD::ADD_VL) {
16984 SDValue AddPassthruOp = N->getOperand(2);
16985 if (!AddPassthruOp.isUndef())
16986 return SDValue();
16987 }
16988
16989 auto IsVWMulOpc = [](unsigned Opc) {
16990 switch (Opc) {
16991 case RISCVISD::VWMUL_VL:
16994 return true;
16995 default:
16996 return false;
16997 }
16998 };
16999
17000 if (!IsVWMulOpc(MulOp.getOpcode()))
17001 std::swap(Addend, MulOp);
17002
17003 if (!IsVWMulOpc(MulOp.getOpcode()))
17004 return SDValue();
17005
17006 SDValue MulPassthruOp = MulOp.getOperand(2);
17007
17008 if (!MulPassthruOp.isUndef())
17009 return SDValue();
17010
17011 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
17012 const RISCVSubtarget &Subtarget) {
17013 if (N->getOpcode() == ISD::ADD) {
17014 SDLoc DL(N);
17015 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
17016 Subtarget);
17017 }
17018 return std::make_pair(N->getOperand(3), N->getOperand(4));
17019 }(N, DAG, Subtarget);
17020
17021 SDValue MulMask = MulOp.getOperand(3);
17022 SDValue MulVL = MulOp.getOperand(4);
17023
17024 if (AddMask != MulMask || AddVL != MulVL)
17025 return SDValue();
17026
17027 unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL;
17028 static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL,
17029 "Unexpected opcode after VWMACC_VL");
17030 static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL,
17031 "Unexpected opcode after VWMACC_VL!");
17032 static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL,
17033 "Unexpected opcode after VWMUL_VL!");
17034 static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL,
17035 "Unexpected opcode after VWMUL_VL!");
17036
17037 SDLoc DL(N);
17038 EVT VT = N->getValueType(0);
17039 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
17040 AddVL};
17041 return DAG.getNode(Opc, DL, VT, Ops);
17042}
17043
17045 ISD::MemIndexType &IndexType,
17047 if (!DCI.isBeforeLegalize())
17048 return false;
17049
17050 SelectionDAG &DAG = DCI.DAG;
17051 const MVT XLenVT =
17052 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
17053
17054 const EVT IndexVT = Index.getValueType();
17055
17056 // RISC-V indexed loads only support the "unsigned unscaled" addressing
17057 // mode, so anything else must be manually legalized.
17058 if (!isIndexTypeSigned(IndexType))
17059 return false;
17060
17061 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
17062 // Any index legalization should first promote to XLenVT, so we don't lose
17063 // bits when scaling. This may create an illegal index type so we let
17064 // LLVM's legalization take care of the splitting.
17065 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
17066 Index = DAG.getNode(ISD::SIGN_EXTEND, DL,
17067 IndexVT.changeVectorElementType(XLenVT), Index);
17068 }
17069 IndexType = ISD::UNSIGNED_SCALED;
17070 return true;
17071}
17072
17073/// Match the index vector of a scatter or gather node as the shuffle mask
17074/// which performs the rearrangement if possible. Will only match if
17075/// all lanes are touched, and thus replacing the scatter or gather with
17076/// a unit strided access and shuffle is legal.
17077static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask,
17078 SmallVector<int> &ShuffleMask) {
17079 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
17080 return false;
17081 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
17082 return false;
17083
17084 const unsigned ElementSize = VT.getScalarStoreSize();
17085 const unsigned NumElems = VT.getVectorNumElements();
17086
17087 // Create the shuffle mask and check all bits active
17088 assert(ShuffleMask.empty());
17089 BitVector ActiveLanes(NumElems);
17090 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
17091 // TODO: We've found an active bit of UB, and could be
17092 // more aggressive here if desired.
17093 if (Index->getOperand(i)->isUndef())
17094 return false;
17095 uint64_t C = Index->getConstantOperandVal(i);
17096 if (C % ElementSize != 0)
17097 return false;
17098 C = C / ElementSize;
17099 if (C >= NumElems)
17100 return false;
17101 ShuffleMask.push_back(C);
17102 ActiveLanes.set(C);
17103 }
17104 return ActiveLanes.all();
17105}
17106
17107/// Match the index of a gather or scatter operation as an operation
17108/// with twice the element width and half the number of elements. This is
17109/// generally profitable (if legal) because these operations are linear
17110/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
17111/// come out ahead.
17112static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask,
17113 Align BaseAlign, const RISCVSubtarget &ST) {
17114 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
17115 return false;
17116 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
17117 return false;
17118
17119 // Attempt a doubling. If we can use a element type 4x or 8x in
17120 // size, this will happen via multiply iterations of the transform.
17121 const unsigned NumElems = VT.getVectorNumElements();
17122 if (NumElems % 2 != 0)
17123 return false;
17124
17125 const unsigned ElementSize = VT.getScalarStoreSize();
17126 const unsigned WiderElementSize = ElementSize * 2;
17127 if (WiderElementSize > ST.getELen()/8)
17128 return false;
17129
17130 if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
17131 return false;
17132
17133 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
17134 // TODO: We've found an active bit of UB, and could be
17135 // more aggressive here if desired.
17136 if (Index->getOperand(i)->isUndef())
17137 return false;
17138 // TODO: This offset check is too strict if we support fully
17139 // misaligned memory operations.
17140 uint64_t C = Index->getConstantOperandVal(i);
17141 if (i % 2 == 0) {
17142 if (C % WiderElementSize != 0)
17143 return false;
17144 continue;
17145 }
17146 uint64_t Last = Index->getConstantOperandVal(i-1);
17147 if (C != Last + ElementSize)
17148 return false;
17149 }
17150 return true;
17151}
17152
17153// trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
17154// This would be benefit for the cases where X and Y are both the same value
17155// type of low precision vectors. Since the truncate would be lowered into
17156// n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
17157// restriction, such pattern would be expanded into a series of "vsetvli"
17158// and "vnsrl" instructions later to reach this point.
17160 SDValue Mask = N->getOperand(1);
17161 SDValue VL = N->getOperand(2);
17162
17163 bool IsVLMAX = isAllOnesConstant(VL) ||
17164 (isa<RegisterSDNode>(VL) &&
17165 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
17166 if (!IsVLMAX || Mask.getOpcode() != RISCVISD::VMSET_VL ||
17167 Mask.getOperand(0) != VL)
17168 return SDValue();
17169
17170 auto IsTruncNode = [&](SDValue V) {
17171 return V.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
17172 V.getOperand(1) == Mask && V.getOperand(2) == VL;
17173 };
17174
17175 SDValue Op = N->getOperand(0);
17176
17177 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
17178 // to distinguish such pattern.
17179 while (IsTruncNode(Op)) {
17180 if (!Op.hasOneUse())
17181 return SDValue();
17182 Op = Op.getOperand(0);
17183 }
17184
17185 if (Op.getOpcode() != ISD::SRA || !Op.hasOneUse())
17186 return SDValue();
17187
17188 SDValue N0 = Op.getOperand(0);
17189 SDValue N1 = Op.getOperand(1);
17190 if (N0.getOpcode() != ISD::SIGN_EXTEND || !N0.hasOneUse() ||
17191 N1.getOpcode() != ISD::ZERO_EXTEND || !N1.hasOneUse())
17192 return SDValue();
17193
17194 SDValue N00 = N0.getOperand(0);
17195 SDValue N10 = N1.getOperand(0);
17196 if (!N00.getValueType().isVector() ||
17197 N00.getValueType() != N10.getValueType() ||
17198 N->getValueType(0) != N10.getValueType())
17199 return SDValue();
17200
17201 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
17202 SDValue SMin =
17203 DAG.getNode(ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
17204 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
17205 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
17206}
17207
17208// Combine (truncate_vector_vl (umin X, C)) -> (vnclipu_vl X) if C is the
17209// maximum value for the truncated type.
17210// Combine (truncate_vector_vl (smin (smax X, C2), C1)) -> (vnclip_vl X) if C1
17211// is the signed maximum value for the truncated type and C2 is the signed
17212// minimum value.
17214 const RISCVSubtarget &Subtarget) {
17215 assert(N->getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL);
17216
17217 MVT VT = N->getSimpleValueType(0);
17218
17219 SDValue Mask = N->getOperand(1);
17220 SDValue VL = N->getOperand(2);
17221
17222 auto MatchMinMax = [&VL, &Mask](SDValue V, unsigned Opc, unsigned OpcVL,
17223 APInt &SplatVal) {
17224 if (V.getOpcode() != Opc &&
17225 !(V.getOpcode() == OpcVL && V.getOperand(2).isUndef() &&
17226 V.getOperand(3) == Mask && V.getOperand(4) == VL))
17227 return SDValue();
17228
17229 SDValue Op = V.getOperand(1);
17230
17231 // Peek through conversion between fixed and scalable vectors.
17232 if (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op.getOperand(0).isUndef() &&
17233 isNullConstant(Op.getOperand(2)) &&
17234 Op.getOperand(1).getValueType().isFixedLengthVector() &&
17235 Op.getOperand(1).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
17236 Op.getOperand(1).getOperand(0).getValueType() == Op.getValueType() &&
17237 isNullConstant(Op.getOperand(1).getOperand(1)))
17238 Op = Op.getOperand(1).getOperand(0);
17239
17240 if (ISD::isConstantSplatVector(Op.getNode(), SplatVal))
17241 return V.getOperand(0);
17242
17243 if (Op.getOpcode() == RISCVISD::VMV_V_X_VL && Op.getOperand(0).isUndef() &&
17244 Op.getOperand(2) == VL) {
17245 if (auto *Op1 = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
17246 SplatVal =
17247 Op1->getAPIntValue().sextOrTrunc(Op.getScalarValueSizeInBits());
17248 return V.getOperand(0);
17249 }
17250 }
17251
17252 return SDValue();
17253 };
17254
17255 SDLoc DL(N);
17256
17257 auto DetectUSatPattern = [&](SDValue V) {
17258 APInt LoC, HiC;
17259
17260 // Simple case, V is a UMIN.
17261 if (SDValue UMinOp = MatchMinMax(V, ISD::UMIN, RISCVISD::UMIN_VL, HiC))
17262 if (HiC.isMask(VT.getScalarSizeInBits()))
17263 return UMinOp;
17264
17265 // If we have an SMAX that removes negative numbers first, then we can match
17266 // SMIN instead of UMIN.
17267 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
17268 if (SDValue SMaxOp =
17269 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
17270 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()))
17271 return SMinOp;
17272
17273 // If we have an SMIN before an SMAX and the SMAX constant is less than or
17274 // equal to the SMIN constant, we can use vnclipu if we insert a new SMAX
17275 // first.
17276 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
17277 if (SDValue SMinOp =
17278 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
17279 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()) &&
17280 HiC.uge(LoC))
17281 return DAG.getNode(RISCVISD::SMAX_VL, DL, V.getValueType(), SMinOp,
17282 V.getOperand(1), DAG.getUNDEF(V.getValueType()),
17283 Mask, VL);
17284
17285 return SDValue();
17286 };
17287
17288 auto DetectSSatPattern = [&](SDValue V) {
17289 unsigned NumDstBits = VT.getScalarSizeInBits();
17290 unsigned NumSrcBits = V.getScalarValueSizeInBits();
17291 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
17292 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
17293
17294 APInt HiC, LoC;
17295 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
17296 if (SDValue SMaxOp =
17297 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
17298 if (HiC == SignedMax && LoC == SignedMin)
17299 return SMaxOp;
17300
17301 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
17302 if (SDValue SMinOp =
17303 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
17304 if (HiC == SignedMax && LoC == SignedMin)
17305 return SMinOp;
17306
17307 return SDValue();
17308 };
17309
17310 SDValue Src = N->getOperand(0);
17311
17312 // Look through multiple layers of truncates.
17313 while (Src.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
17314 Src.getOperand(1) == Mask && Src.getOperand(2) == VL &&
17315 Src.hasOneUse())
17316 Src = Src.getOperand(0);
17317
17318 SDValue Val;
17319 unsigned ClipOpc;
17320 if ((Val = DetectUSatPattern(Src)))
17322 else if ((Val = DetectSSatPattern(Src)))
17324 else
17325 return SDValue();
17326
17327 MVT ValVT = Val.getSimpleValueType();
17328
17329 do {
17330 MVT ValEltVT = MVT::getIntegerVT(ValVT.getScalarSizeInBits() / 2);
17331 ValVT = ValVT.changeVectorElementType(ValEltVT);
17332 Val = DAG.getNode(ClipOpc, DL, ValVT, Val, Mask, VL);
17333 } while (ValVT != VT);
17334
17335 return Val;
17336}
17337
17338// Convert
17339// (iX ctpop (bitcast (vXi1 A)))
17340// ->
17341// (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))
17342// FIXME: It's complicated to match all the variations of this after type
17343// legalization so we only handle the pre-type legalization pattern, but that
17344// requires the fixed vector type to be legal.
17346 const RISCVSubtarget &Subtarget) {
17347 EVT VT = N->getValueType(0);
17348 if (!VT.isScalarInteger())
17349 return SDValue();
17350
17351 SDValue Src = N->getOperand(0);
17352
17353 // Peek through zero_extend. It doesn't change the count.
17354 if (Src.getOpcode() == ISD::ZERO_EXTEND)
17355 Src = Src.getOperand(0);
17356
17357 if (Src.getOpcode() != ISD::BITCAST)
17358 return SDValue();
17359
17360 Src = Src.getOperand(0);
17361 EVT SrcEVT = Src.getValueType();
17362 if (!SrcEVT.isSimple())
17363 return SDValue();
17364
17365 MVT SrcMVT = SrcEVT.getSimpleVT();
17366 // Make sure the input is an i1 vector.
17367 if (!SrcMVT.isVector() || SrcMVT.getVectorElementType() != MVT::i1)
17368 return SDValue();
17369
17370 if (!useRVVForFixedLengthVectorVT(SrcMVT, Subtarget))
17371 return SDValue();
17372
17373 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcMVT, Subtarget);
17374 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
17375
17376 SDLoc DL(N);
17377 auto [Mask, VL] = getDefaultVLOps(SrcMVT, ContainerVT, DL, DAG, Subtarget);
17378
17379 MVT XLenVT = Subtarget.getXLenVT();
17380 SDValue Pop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Src, Mask, VL);
17381 return DAG.getZExtOrTrunc(Pop, DL, VT);
17382}
17383
17385 DAGCombinerInfo &DCI) const {
17386 SelectionDAG &DAG = DCI.DAG;
17387 const MVT XLenVT = Subtarget.getXLenVT();
17388 SDLoc DL(N);
17389
17390 // Helper to call SimplifyDemandedBits on an operand of N where only some low
17391 // bits are demanded. N will be added to the Worklist if it was not deleted.
17392 // Caller should return SDValue(N, 0) if this returns true.
17393 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
17394 SDValue Op = N->getOperand(OpNo);
17395 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
17396 if (!SimplifyDemandedBits(Op, Mask, DCI))
17397 return false;
17398
17399 if (N->getOpcode() != ISD::DELETED_NODE)
17400 DCI.AddToWorklist(N);
17401 return true;
17402 };
17403
17404 switch (N->getOpcode()) {
17405 default:
17406 break;
17407 case RISCVISD::SplitF64: {
17408 SDValue Op0 = N->getOperand(0);
17409 // If the input to SplitF64 is just BuildPairF64 then the operation is
17410 // redundant. Instead, use BuildPairF64's operands directly.
17411 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
17412 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
17413
17414 if (Op0->isUndef()) {
17415 SDValue Lo = DAG.getUNDEF(MVT::i32);
17416 SDValue Hi = DAG.getUNDEF(MVT::i32);
17417 return DCI.CombineTo(N, Lo, Hi);
17418 }
17419
17420 // It's cheaper to materialise two 32-bit integers than to load a double
17421 // from the constant pool and transfer it to integer registers through the
17422 // stack.
17423 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
17424 APInt V = C->getValueAPF().bitcastToAPInt();
17425 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
17426 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
17427 return DCI.CombineTo(N, Lo, Hi);
17428 }
17429
17430 // This is a target-specific version of a DAGCombine performed in
17431 // DAGCombiner::visitBITCAST. It performs the equivalent of:
17432 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
17433 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
17434 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
17435 !Op0.getNode()->hasOneUse() || Subtarget.hasStdExtZdinx())
17436 break;
17437 SDValue NewSplitF64 =
17438 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
17439 Op0.getOperand(0));
17440 SDValue Lo = NewSplitF64.getValue(0);
17441 SDValue Hi = NewSplitF64.getValue(1);
17442 APInt SignBit = APInt::getSignMask(32);
17443 if (Op0.getOpcode() == ISD::FNEG) {
17444 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
17445 DAG.getConstant(SignBit, DL, MVT::i32));
17446 return DCI.CombineTo(N, Lo, NewHi);
17447 }
17448 assert(Op0.getOpcode() == ISD::FABS);
17449 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
17450 DAG.getConstant(~SignBit, DL, MVT::i32));
17451 return DCI.CombineTo(N, Lo, NewHi);
17452 }
17453 case RISCVISD::SLLW:
17454 case RISCVISD::SRAW:
17455 case RISCVISD::SRLW:
17456 case RISCVISD::RORW:
17457 case RISCVISD::ROLW: {
17458 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
17459 if (SimplifyDemandedLowBitsHelper(0, 32) ||
17460 SimplifyDemandedLowBitsHelper(1, 5))
17461 return SDValue(N, 0);
17462
17463 break;
17464 }
17465 case RISCVISD::CLZW:
17466 case RISCVISD::CTZW: {
17467 // Only the lower 32 bits of the first operand are read
17468 if (SimplifyDemandedLowBitsHelper(0, 32))
17469 return SDValue(N, 0);
17470 break;
17471 }
17473 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
17474 // conversion is unnecessary and can be replaced with the
17475 // FMV_X_ANYEXTW_RV64 operand.
17476 SDValue Op0 = N->getOperand(0);
17478 return Op0.getOperand(0);
17479 break;
17480 }
17483 SDLoc DL(N);
17484 SDValue Op0 = N->getOperand(0);
17485 MVT VT = N->getSimpleValueType(0);
17486
17487 // Constant fold.
17488 if (auto *CFP = dyn_cast<ConstantFPSDNode>(Op0)) {
17489 APInt Val = CFP->getValueAPF().bitcastToAPInt().sext(VT.getSizeInBits());
17490 return DAG.getConstant(Val, DL, VT);
17491 }
17492
17493 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
17494 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
17495 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
17496 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
17497 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
17498 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
17499 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
17500 assert(Op0.getOperand(0).getValueType() == VT &&
17501 "Unexpected value type!");
17502 return Op0.getOperand(0);
17503 }
17504
17505 if (ISD::isNormalLoad(Op0.getNode()) && Op0.hasOneUse() &&
17506 cast<LoadSDNode>(Op0)->isSimple()) {
17508 auto *LN0 = cast<LoadSDNode>(Op0);
17509 SDValue Load =
17510 DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(),
17511 LN0->getBasePtr(), IVT, LN0->getMemOperand());
17512 DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
17513 return Load;
17514 }
17515
17516 // This is a target-specific version of a DAGCombine performed in
17517 // DAGCombiner::visitBITCAST. It performs the equivalent of:
17518 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
17519 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
17520 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
17521 !Op0.getNode()->hasOneUse())
17522 break;
17523 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
17524 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
17525 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
17526 if (Op0.getOpcode() == ISD::FNEG)
17527 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
17528 DAG.getConstant(SignBit, DL, VT));
17529
17530 assert(Op0.getOpcode() == ISD::FABS);
17531 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
17532 DAG.getConstant(~SignBit, DL, VT));
17533 }
17534 case ISD::ABS: {
17535 EVT VT = N->getValueType(0);
17536 SDValue N0 = N->getOperand(0);
17537 // abs (sext) -> zext (abs)
17538 // abs (zext) -> zext (handled elsewhere)
17539 if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
17540 SDValue Src = N0.getOperand(0);
17541 SDLoc DL(N);
17542 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
17543 DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src));
17544 }
17545 break;
17546 }
17547 case ISD::ADD: {
17548 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17549 return V;
17550 if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
17551 return V;
17552 return performADDCombine(N, DCI, Subtarget);
17553 }
17554 case ISD::SUB: {
17555 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17556 return V;
17557 return performSUBCombine(N, DAG, Subtarget);
17558 }
17559 case ISD::AND:
17560 return performANDCombine(N, DCI, Subtarget);
17561 case ISD::OR: {
17562 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17563 return V;
17564 return performORCombine(N, DCI, Subtarget);
17565 }
17566 case ISD::XOR:
17567 return performXORCombine(N, DAG, Subtarget);
17568 case ISD::MUL:
17569 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17570 return V;
17571 return performMULCombine(N, DAG, DCI, Subtarget);
17572 case ISD::SDIV:
17573 case ISD::UDIV:
17574 case ISD::SREM:
17575 case ISD::UREM:
17576 if (SDValue V = combineBinOpOfZExt(N, DAG))
17577 return V;
17578 break;
17579 case ISD::FMUL: {
17580 // fmul X, (copysign 1.0, Y) -> fsgnjx X, Y
17581 SDValue N0 = N->getOperand(0);
17582 SDValue N1 = N->getOperand(1);
17583 if (N0->getOpcode() != ISD::FCOPYSIGN)
17584 std::swap(N0, N1);
17585 if (N0->getOpcode() != ISD::FCOPYSIGN)
17586 return SDValue();
17587 ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0->getOperand(0));
17588 if (!C || !C->getValueAPF().isExactlyValue(+1.0))
17589 return SDValue();
17590 EVT VT = N->getValueType(0);
17591 if (VT.isVector() || !isOperationLegal(ISD::FCOPYSIGN, VT))
17592 return SDValue();
17593 SDValue Sign = N0->getOperand(1);
17594 if (Sign.getValueType() != VT)
17595 return SDValue();
17596 return DAG.getNode(RISCVISD::FSGNJX, SDLoc(N), VT, N1, N0->getOperand(1));
17597 }
17598 case ISD::FADD:
17599 case ISD::UMAX:
17600 case ISD::UMIN:
17601 case ISD::SMAX:
17602 case ISD::SMIN:
17603 case ISD::FMAXNUM:
17604 case ISD::FMINNUM: {
17605 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
17606 return V;
17607 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
17608 return V;
17609 return SDValue();
17610 }
17611 case ISD::SETCC:
17612 return performSETCCCombine(N, DAG, Subtarget);
17614 return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
17615 case ISD::ZERO_EXTEND:
17616 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
17617 // type legalization. This is safe because fp_to_uint produces poison if
17618 // it overflows.
17619 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
17620 SDValue Src = N->getOperand(0);
17621 if (Src.getOpcode() == ISD::FP_TO_UINT &&
17622 isTypeLegal(Src.getOperand(0).getValueType()))
17623 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
17624 Src.getOperand(0));
17625 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
17626 isTypeLegal(Src.getOperand(1).getValueType())) {
17627 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
17628 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
17629 Src.getOperand(0), Src.getOperand(1));
17630 DCI.CombineTo(N, Res);
17631 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
17632 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
17633 return SDValue(N, 0); // Return N so it doesn't get rechecked.
17634 }
17635 }
17636 return SDValue();
17638 if (SDValue V = combineTruncOfSraSext(N, DAG))
17639 return V;
17640 return combineTruncToVnclip(N, DAG, Subtarget);
17641 case ISD::TRUNCATE:
17642 return performTRUNCATECombine(N, DAG, Subtarget);
17643 case ISD::SELECT:
17644 return performSELECTCombine(N, DAG, Subtarget);
17646 case RISCVISD::CZERO_NEZ: {
17647 SDValue Val = N->getOperand(0);
17648 SDValue Cond = N->getOperand(1);
17649
17650 unsigned Opc = N->getOpcode();
17651
17652 // czero_eqz x, x -> x
17653 if (Opc == RISCVISD::CZERO_EQZ && Val == Cond)
17654 return Val;
17655
17656 unsigned InvOpc =
17658
17659 // czero_eqz X, (xor Y, 1) -> czero_nez X, Y if Y is 0 or 1.
17660 // czero_nez X, (xor Y, 1) -> czero_eqz X, Y if Y is 0 or 1.
17661 if (Cond.getOpcode() == ISD::XOR && isOneConstant(Cond.getOperand(1))) {
17662 SDValue NewCond = Cond.getOperand(0);
17663 APInt Mask = APInt::getBitsSetFrom(NewCond.getValueSizeInBits(), 1);
17664 if (DAG.MaskedValueIsZero(NewCond, Mask))
17665 return DAG.getNode(InvOpc, SDLoc(N), N->getValueType(0), Val, NewCond);
17666 }
17667 // czero_eqz x, (setcc y, 0, ne) -> czero_eqz x, y
17668 // czero_nez x, (setcc y, 0, ne) -> czero_nez x, y
17669 // czero_eqz x, (setcc y, 0, eq) -> czero_nez x, y
17670 // czero_nez x, (setcc y, 0, eq) -> czero_eqz x, y
17671 if (Cond.getOpcode() == ISD::SETCC && isNullConstant(Cond.getOperand(1))) {
17672 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
17673 if (ISD::isIntEqualitySetCC(CCVal))
17674 return DAG.getNode(CCVal == ISD::SETNE ? Opc : InvOpc, SDLoc(N),
17675 N->getValueType(0), Val, Cond.getOperand(0));
17676 }
17677 return SDValue();
17678 }
17679 case RISCVISD::SELECT_CC: {
17680 // Transform
17681 SDValue LHS = N->getOperand(0);
17682 SDValue RHS = N->getOperand(1);
17683 SDValue CC = N->getOperand(2);
17684 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
17685 SDValue TrueV = N->getOperand(3);
17686 SDValue FalseV = N->getOperand(4);
17687 SDLoc DL(N);
17688 EVT VT = N->getValueType(0);
17689
17690 // If the True and False values are the same, we don't need a select_cc.
17691 if (TrueV == FalseV)
17692 return TrueV;
17693
17694 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
17695 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
17696 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
17697 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
17698 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
17699 if (CCVal == ISD::CondCode::SETGE)
17700 std::swap(TrueV, FalseV);
17701
17702 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
17703 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
17704 // Only handle simm12, if it is not in this range, it can be considered as
17705 // register.
17706 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
17707 isInt<12>(TrueSImm - FalseSImm)) {
17708 SDValue SRA =
17709 DAG.getNode(ISD::SRA, DL, VT, LHS,
17710 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
17711 SDValue AND =
17712 DAG.getNode(ISD::AND, DL, VT, SRA,
17713 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
17714 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
17715 }
17716
17717 if (CCVal == ISD::CondCode::SETGE)
17718 std::swap(TrueV, FalseV);
17719 }
17720
17721 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
17722 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
17723 {LHS, RHS, CC, TrueV, FalseV});
17724
17725 if (!Subtarget.hasConditionalMoveFusion()) {
17726 // (select c, -1, y) -> -c | y
17727 if (isAllOnesConstant(TrueV)) {
17728 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
17729 SDValue Neg = DAG.getNegative(C, DL, VT);
17730 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
17731 }
17732 // (select c, y, -1) -> -!c | y
17733 if (isAllOnesConstant(FalseV)) {
17734 SDValue C =
17735 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
17736 SDValue Neg = DAG.getNegative(C, DL, VT);
17737 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
17738 }
17739
17740 // (select c, 0, y) -> -!c & y
17741 if (isNullConstant(TrueV)) {
17742 SDValue C =
17743 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
17744 SDValue Neg = DAG.getNegative(C, DL, VT);
17745 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
17746 }
17747 // (select c, y, 0) -> -c & y
17748 if (isNullConstant(FalseV)) {
17749 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
17750 SDValue Neg = DAG.getNegative(C, DL, VT);
17751 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
17752 }
17753 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
17754 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
17755 if (((isOneConstant(FalseV) && LHS == TrueV &&
17756 CCVal == ISD::CondCode::SETNE) ||
17757 (isOneConstant(TrueV) && LHS == FalseV &&
17758 CCVal == ISD::CondCode::SETEQ)) &&
17760 // freeze it to be safe.
17761 LHS = DAG.getFreeze(LHS);
17763 return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
17764 }
17765 }
17766
17767 // If both true/false are an xor with 1, pull through the select.
17768 // This can occur after op legalization if both operands are setccs that
17769 // require an xor to invert.
17770 // FIXME: Generalize to other binary ops with identical operand?
17771 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
17772 TrueV.getOperand(1) == FalseV.getOperand(1) &&
17773 isOneConstant(TrueV.getOperand(1)) &&
17774 TrueV.hasOneUse() && FalseV.hasOneUse()) {
17775 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
17776 TrueV.getOperand(0), FalseV.getOperand(0));
17777 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
17778 }
17779
17780 return SDValue();
17781 }
17782 case RISCVISD::BR_CC: {
17783 SDValue LHS = N->getOperand(1);
17784 SDValue RHS = N->getOperand(2);
17785 SDValue CC = N->getOperand(3);
17786 SDLoc DL(N);
17787
17788 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
17789 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
17790 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
17791
17792 return SDValue();
17793 }
17794 case ISD::BITREVERSE:
17795 return performBITREVERSECombine(N, DAG, Subtarget);
17796 case ISD::FP_TO_SINT:
17797 case ISD::FP_TO_UINT:
17798 return performFP_TO_INTCombine(N, DCI, Subtarget);
17801 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
17802 case ISD::FCOPYSIGN: {
17803 EVT VT = N->getValueType(0);
17804 if (!VT.isVector())
17805 break;
17806 // There is a form of VFSGNJ which injects the negated sign of its second
17807 // operand. Try and bubble any FNEG up after the extend/round to produce
17808 // this optimized pattern. Avoid modifying cases where FP_ROUND and
17809 // TRUNC=1.
17810 SDValue In2 = N->getOperand(1);
17811 // Avoid cases where the extend/round has multiple uses, as duplicating
17812 // those is typically more expensive than removing a fneg.
17813 if (!In2.hasOneUse())
17814 break;
17815 if (In2.getOpcode() != ISD::FP_EXTEND &&
17816 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
17817 break;
17818 In2 = In2.getOperand(0);
17819 if (In2.getOpcode() != ISD::FNEG)
17820 break;
17821 SDLoc DL(N);
17822 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
17823 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
17824 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
17825 }
17826 case ISD::MGATHER: {
17827 const auto *MGN = cast<MaskedGatherSDNode>(N);
17828 const EVT VT = N->getValueType(0);
17829 SDValue Index = MGN->getIndex();
17830 SDValue ScaleOp = MGN->getScale();
17831 ISD::MemIndexType IndexType = MGN->getIndexType();
17832 assert(!MGN->isIndexScaled() &&
17833 "Scaled gather/scatter should not be formed");
17834
17835 SDLoc DL(N);
17836 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
17837 return DAG.getMaskedGather(
17838 N->getVTList(), MGN->getMemoryVT(), DL,
17839 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
17840 MGN->getBasePtr(), Index, ScaleOp},
17841 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
17842
17843 if (narrowIndex(Index, IndexType, DAG))
17844 return DAG.getMaskedGather(
17845 N->getVTList(), MGN->getMemoryVT(), DL,
17846 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
17847 MGN->getBasePtr(), Index, ScaleOp},
17848 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
17849
17850 if (Index.getOpcode() == ISD::BUILD_VECTOR &&
17851 MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
17852 // The sequence will be XLenVT, not the type of Index. Tell
17853 // isSimpleVIDSequence this so we avoid overflow.
17854 if (std::optional<VIDSequence> SimpleVID =
17855 isSimpleVIDSequence(Index, Subtarget.getXLen());
17856 SimpleVID && SimpleVID->StepDenominator == 1) {
17857 const int64_t StepNumerator = SimpleVID->StepNumerator;
17858 const int64_t Addend = SimpleVID->Addend;
17859
17860 // Note: We don't need to check alignment here since (by assumption
17861 // from the existance of the gather), our offsets must be sufficiently
17862 // aligned.
17863
17864 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
17865 assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
17866 assert(IndexType == ISD::UNSIGNED_SCALED);
17867 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
17868 DAG.getSignedConstant(Addend, DL, PtrVT));
17869
17870 SDValue EVL = DAG.getElementCount(DL, Subtarget.getXLenVT(),
17872 SDValue StridedLoad = DAG.getStridedLoadVP(
17873 VT, DL, MGN->getChain(), BasePtr,
17874 DAG.getSignedConstant(StepNumerator, DL, XLenVT), MGN->getMask(),
17875 EVL, MGN->getMemOperand());
17876 SDValue VPSelect = DAG.getNode(ISD::VP_SELECT, DL, VT, MGN->getMask(),
17877 StridedLoad, MGN->getPassThru(), EVL);
17878 return DAG.getMergeValues({VPSelect, SDValue(StridedLoad.getNode(), 1)},
17879 DL);
17880 }
17881 }
17882
17883 SmallVector<int> ShuffleMask;
17884 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
17885 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
17886 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
17887 MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
17888 MGN->getMask(), DAG.getUNDEF(VT),
17889 MGN->getMemoryVT(), MGN->getMemOperand(),
17891 SDValue Shuffle =
17892 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
17893 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
17894 }
17895
17896 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
17897 matchIndexAsWiderOp(VT, Index, MGN->getMask(),
17898 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
17899 SmallVector<SDValue> NewIndices;
17900 for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
17901 NewIndices.push_back(Index.getOperand(i));
17902 EVT IndexVT = Index.getValueType()
17903 .getHalfNumVectorElementsVT(*DAG.getContext());
17904 Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
17905
17906 unsigned ElementSize = VT.getScalarStoreSize();
17907 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
17908 auto EltCnt = VT.getVectorElementCount();
17909 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
17910 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
17911 EltCnt.divideCoefficientBy(2));
17912 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
17913 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
17914 EltCnt.divideCoefficientBy(2));
17915 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
17916
17917 SDValue Gather =
17918 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
17919 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
17920 Index, ScaleOp},
17921 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
17922 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
17923 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
17924 }
17925 break;
17926 }
17927 case ISD::MSCATTER:{
17928 const auto *MSN = cast<MaskedScatterSDNode>(N);
17929 SDValue Index = MSN->getIndex();
17930 SDValue ScaleOp = MSN->getScale();
17931 ISD::MemIndexType IndexType = MSN->getIndexType();
17932 assert(!MSN->isIndexScaled() &&
17933 "Scaled gather/scatter should not be formed");
17934
17935 SDLoc DL(N);
17936 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
17937 return DAG.getMaskedScatter(
17938 N->getVTList(), MSN->getMemoryVT(), DL,
17939 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
17940 Index, ScaleOp},
17941 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
17942
17943 if (narrowIndex(Index, IndexType, DAG))
17944 return DAG.getMaskedScatter(
17945 N->getVTList(), MSN->getMemoryVT(), DL,
17946 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
17947 Index, ScaleOp},
17948 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
17949
17950 EVT VT = MSN->getValue()->getValueType(0);
17951 SmallVector<int> ShuffleMask;
17952 if (!MSN->isTruncatingStore() &&
17953 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
17954 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
17955 DAG.getUNDEF(VT), ShuffleMask);
17956 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
17957 DAG.getUNDEF(XLenVT), MSN->getMask(),
17958 MSN->getMemoryVT(), MSN->getMemOperand(),
17959 ISD::UNINDEXED, false);
17960 }
17961 break;
17962 }
17963 case ISD::VP_GATHER: {
17964 const auto *VPGN = cast<VPGatherSDNode>(N);
17965 SDValue Index = VPGN->getIndex();
17966 SDValue ScaleOp = VPGN->getScale();
17967 ISD::MemIndexType IndexType = VPGN->getIndexType();
17968 assert(!VPGN->isIndexScaled() &&
17969 "Scaled gather/scatter should not be formed");
17970
17971 SDLoc DL(N);
17972 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
17973 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
17974 {VPGN->getChain(), VPGN->getBasePtr(), Index,
17975 ScaleOp, VPGN->getMask(),
17976 VPGN->getVectorLength()},
17977 VPGN->getMemOperand(), IndexType);
17978
17979 if (narrowIndex(Index, IndexType, DAG))
17980 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
17981 {VPGN->getChain(), VPGN->getBasePtr(), Index,
17982 ScaleOp, VPGN->getMask(),
17983 VPGN->getVectorLength()},
17984 VPGN->getMemOperand(), IndexType);
17985
17986 break;
17987 }
17988 case ISD::VP_SCATTER: {
17989 const auto *VPSN = cast<VPScatterSDNode>(N);
17990 SDValue Index = VPSN->getIndex();
17991 SDValue ScaleOp = VPSN->getScale();
17992 ISD::MemIndexType IndexType = VPSN->getIndexType();
17993 assert(!VPSN->isIndexScaled() &&
17994 "Scaled gather/scatter should not be formed");
17995
17996 SDLoc DL(N);
17997 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
17998 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
17999 {VPSN->getChain(), VPSN->getValue(),
18000 VPSN->getBasePtr(), Index, ScaleOp,
18001 VPSN->getMask(), VPSN->getVectorLength()},
18002 VPSN->getMemOperand(), IndexType);
18003
18004 if (narrowIndex(Index, IndexType, DAG))
18005 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
18006 {VPSN->getChain(), VPSN->getValue(),
18007 VPSN->getBasePtr(), Index, ScaleOp,
18008 VPSN->getMask(), VPSN->getVectorLength()},
18009 VPSN->getMemOperand(), IndexType);
18010 break;
18011 }
18012 case RISCVISD::SHL_VL:
18013 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
18014 return V;
18015 [[fallthrough]];
18016 case RISCVISD::SRA_VL:
18017 case RISCVISD::SRL_VL: {
18018 SDValue ShAmt = N->getOperand(1);
18020 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
18021 SDLoc DL(N);
18022 SDValue VL = N->getOperand(4);
18023 EVT VT = N->getValueType(0);
18024 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
18025 ShAmt.getOperand(1), VL);
18026 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
18027 N->getOperand(2), N->getOperand(3), N->getOperand(4));
18028 }
18029 break;
18030 }
18031 case ISD::SRA:
18032 if (SDValue V = performSRACombine(N, DAG, Subtarget))
18033 return V;
18034 [[fallthrough]];
18035 case ISD::SRL:
18036 case ISD::SHL: {
18037 if (N->getOpcode() == ISD::SHL) {
18038 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
18039 return V;
18040 }
18041 SDValue ShAmt = N->getOperand(1);
18043 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
18044 SDLoc DL(N);
18045 EVT VT = N->getValueType(0);
18046 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
18047 ShAmt.getOperand(1),
18048 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
18049 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
18050 }
18051 break;
18052 }
18053 case RISCVISD::ADD_VL:
18054 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
18055 return V;
18056 return combineToVWMACC(N, DAG, Subtarget);
18061 return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
18062 case RISCVISD::SUB_VL:
18063 case RISCVISD::MUL_VL:
18064 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
18073 return performVFMADD_VLCombine(N, DCI, Subtarget);
18074 case RISCVISD::FADD_VL:
18075 case RISCVISD::FSUB_VL:
18076 case RISCVISD::FMUL_VL:
18079 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
18080 case ISD::LOAD:
18081 case ISD::STORE: {
18082 if (DCI.isAfterLegalizeDAG())
18083 if (SDValue V = performMemPairCombine(N, DCI))
18084 return V;
18085
18086 if (N->getOpcode() != ISD::STORE)
18087 break;
18088
18089 auto *Store = cast<StoreSDNode>(N);
18090 SDValue Chain = Store->getChain();
18091 EVT MemVT = Store->getMemoryVT();
18092 SDValue Val = Store->getValue();
18093 SDLoc DL(N);
18094
18095 bool IsScalarizable =
18096 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
18097 Store->isSimple() &&
18098 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
18099 isPowerOf2_64(MemVT.getSizeInBits()) &&
18100 MemVT.getSizeInBits() <= Subtarget.getXLen();
18101
18102 // If sufficiently aligned we can scalarize stores of constant vectors of
18103 // any power-of-two size up to XLen bits, provided that they aren't too
18104 // expensive to materialize.
18105 // vsetivli zero, 2, e8, m1, ta, ma
18106 // vmv.v.i v8, 4
18107 // vse64.v v8, (a0)
18108 // ->
18109 // li a1, 1028
18110 // sh a1, 0(a0)
18111 if (DCI.isBeforeLegalize() && IsScalarizable &&
18113 // Get the constant vector bits
18114 APInt NewC(Val.getValueSizeInBits(), 0);
18115 uint64_t EltSize = Val.getScalarValueSizeInBits();
18116 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
18117 if (Val.getOperand(i).isUndef())
18118 continue;
18119 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
18120 i * EltSize);
18121 }
18122 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
18123
18124 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
18125 true) <= 2 &&
18127 NewVT, *Store->getMemOperand())) {
18128 SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
18129 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
18130 Store->getPointerInfo(), Store->getOriginalAlign(),
18131 Store->getMemOperand()->getFlags());
18132 }
18133 }
18134
18135 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
18136 // vsetivli zero, 2, e16, m1, ta, ma
18137 // vle16.v v8, (a0)
18138 // vse16.v v8, (a1)
18139 if (auto *L = dyn_cast<LoadSDNode>(Val);
18140 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
18141 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
18142 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
18143 L->getMemoryVT() == MemVT) {
18144 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
18146 NewVT, *Store->getMemOperand()) &&
18148 NewVT, *L->getMemOperand())) {
18149 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
18150 L->getPointerInfo(), L->getOriginalAlign(),
18151 L->getMemOperand()->getFlags());
18152 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
18153 Store->getPointerInfo(), Store->getOriginalAlign(),
18154 Store->getMemOperand()->getFlags());
18155 }
18156 }
18157
18158 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
18159 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
18160 // any illegal types.
18161 if (Val.getOpcode() == RISCVISD::VMV_X_S ||
18162 (DCI.isAfterLegalizeDAG() &&
18164 isNullConstant(Val.getOperand(1)))) {
18165 SDValue Src = Val.getOperand(0);
18166 MVT VecVT = Src.getSimpleValueType();
18167 // VecVT should be scalable and memory VT should match the element type.
18168 if (!Store->isIndexed() && VecVT.isScalableVector() &&
18169 MemVT == VecVT.getVectorElementType()) {
18170 SDLoc DL(N);
18171 MVT MaskVT = getMaskTypeFor(VecVT);
18172 return DAG.getStoreVP(
18173 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
18174 DAG.getConstant(1, DL, MaskVT),
18175 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
18176 Store->getMemOperand(), Store->getAddressingMode(),
18177 Store->isTruncatingStore(), /*IsCompress*/ false);
18178 }
18179 }
18180
18181 break;
18182 }
18183 case ISD::SPLAT_VECTOR: {
18184 EVT VT = N->getValueType(0);
18185 // Only perform this combine on legal MVT types.
18186 if (!isTypeLegal(VT))
18187 break;
18188 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
18189 DAG, Subtarget))
18190 return Gather;
18191 break;
18192 }
18193 case ISD::BUILD_VECTOR:
18194 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
18195 return V;
18196 break;
18198 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
18199 return V;
18200 break;
18202 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
18203 return V;
18204 break;
18205 case RISCVISD::VFMV_V_F_VL: {
18206 const MVT VT = N->getSimpleValueType(0);
18207 SDValue Passthru = N->getOperand(0);
18208 SDValue Scalar = N->getOperand(1);
18209 SDValue VL = N->getOperand(2);
18210
18211 // If VL is 1, we can use vfmv.s.f.
18212 if (isOneConstant(VL))
18213 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
18214 break;
18215 }
18216 case RISCVISD::VMV_V_X_VL: {
18217 const MVT VT = N->getSimpleValueType(0);
18218 SDValue Passthru = N->getOperand(0);
18219 SDValue Scalar = N->getOperand(1);
18220 SDValue VL = N->getOperand(2);
18221
18222 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
18223 // scalar input.
18224 unsigned ScalarSize = Scalar.getValueSizeInBits();
18225 unsigned EltWidth = VT.getScalarSizeInBits();
18226 if (ScalarSize > EltWidth && Passthru.isUndef())
18227 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
18228 return SDValue(N, 0);
18229
18230 // If VL is 1 and the scalar value won't benefit from immediate, we can
18231 // use vmv.s.x.
18232 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
18233 if (isOneConstant(VL) &&
18234 (!Const || Const->isZero() ||
18235 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
18236 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
18237
18238 break;
18239 }
18240 case RISCVISD::VFMV_S_F_VL: {
18241 SDValue Src = N->getOperand(1);
18242 // Try to remove vector->scalar->vector if the scalar->vector is inserting
18243 // into an undef vector.
18244 // TODO: Could use a vslide or vmv.v.v for non-undef.
18245 if (N->getOperand(0).isUndef() &&
18246 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18247 isNullConstant(Src.getOperand(1)) &&
18248 Src.getOperand(0).getValueType().isScalableVector()) {
18249 EVT VT = N->getValueType(0);
18250 EVT SrcVT = Src.getOperand(0).getValueType();
18252 // Widths match, just return the original vector.
18253 if (SrcVT == VT)
18254 return Src.getOperand(0);
18255 // TODO: Use insert_subvector/extract_subvector to change widen/narrow?
18256 }
18257 [[fallthrough]];
18258 }
18259 case RISCVISD::VMV_S_X_VL: {
18260 const MVT VT = N->getSimpleValueType(0);
18261 SDValue Passthru = N->getOperand(0);
18262 SDValue Scalar = N->getOperand(1);
18263 SDValue VL = N->getOperand(2);
18264
18265 if (Scalar.getOpcode() == RISCVISD::VMV_X_S && Passthru.isUndef() &&
18266 Scalar.getOperand(0).getValueType() == N->getValueType(0))
18267 return Scalar.getOperand(0);
18268
18269 // Use M1 or smaller to avoid over constraining register allocation
18270 const MVT M1VT = getLMUL1VT(VT);
18271 if (M1VT.bitsLT(VT)) {
18272 SDValue M1Passthru =
18273 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Passthru,
18274 DAG.getVectorIdxConstant(0, DL));
18275 SDValue Result =
18276 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
18277 Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, Result,
18278 DAG.getVectorIdxConstant(0, DL));
18279 return Result;
18280 }
18281
18282 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
18283 // higher would involve overly constraining the register allocator for
18284 // no purpose.
18285 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
18286 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
18287 VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef())
18288 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
18289
18290 break;
18291 }
18292 case RISCVISD::VMV_X_S: {
18293 SDValue Vec = N->getOperand(0);
18294 MVT VecVT = N->getOperand(0).getSimpleValueType();
18295 const MVT M1VT = getLMUL1VT(VecVT);
18296 if (M1VT.bitsLT(VecVT)) {
18297 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
18298 DAG.getVectorIdxConstant(0, DL));
18299 return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec);
18300 }
18301 break;
18302 }
18306 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
18307 unsigned IntNo = N->getConstantOperandVal(IntOpNo);
18308 switch (IntNo) {
18309 // By default we do not combine any intrinsic.
18310 default:
18311 return SDValue();
18312 case Intrinsic::riscv_vcpop:
18313 case Intrinsic::riscv_vcpop_mask:
18314 case Intrinsic::riscv_vfirst:
18315 case Intrinsic::riscv_vfirst_mask: {
18316 SDValue VL = N->getOperand(2);
18317 if (IntNo == Intrinsic::riscv_vcpop_mask ||
18318 IntNo == Intrinsic::riscv_vfirst_mask)
18319 VL = N->getOperand(3);
18320 if (!isNullConstant(VL))
18321 return SDValue();
18322 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
18323 SDLoc DL(N);
18324 EVT VT = N->getValueType(0);
18325 if (IntNo == Intrinsic::riscv_vfirst ||
18326 IntNo == Intrinsic::riscv_vfirst_mask)
18327 return DAG.getAllOnesConstant(DL, VT);
18328 return DAG.getConstant(0, DL, VT);
18329 }
18330 }
18331 }
18332 case ISD::BITCAST: {
18334 SDValue N0 = N->getOperand(0);
18335 EVT VT = N->getValueType(0);
18336 EVT SrcVT = N0.getValueType();
18337 if (VT.isRISCVVectorTuple() && N0->getOpcode() == ISD::SPLAT_VECTOR) {
18338 unsigned NF = VT.getRISCVVectorTupleNumFields();
18339 unsigned NumScalElts = VT.getSizeInBits().getKnownMinValue() / (NF * 8);
18340 SDValue EltVal = DAG.getConstant(0, DL, Subtarget.getXLenVT());
18341 MVT ScalTy = MVT::getScalableVectorVT(MVT::getIntegerVT(8), NumScalElts);
18342
18343 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, DL, ScalTy, EltVal);
18344
18345 SDValue Result = DAG.getUNDEF(VT);
18346 for (unsigned i = 0; i < NF; ++i)
18347 Result = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Result, Splat,
18348 DAG.getVectorIdxConstant(i, DL));
18349 return Result;
18350 }
18351 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
18352 // type, widen both sides to avoid a trip through memory.
18353 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
18354 VT.isScalarInteger()) {
18355 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
18356 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
18357 Ops[0] = N0;
18358 SDLoc DL(N);
18359 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
18360 N0 = DAG.getBitcast(MVT::i8, N0);
18361 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
18362 }
18363
18364 return SDValue();
18365 }
18366 case ISD::CTPOP:
18367 if (SDValue V = combineScalarCTPOPToVCPOP(N, DAG, Subtarget))
18368 return V;
18369 break;
18370 }
18371
18372 return SDValue();
18373}
18374
18376 EVT XVT, unsigned KeptBits) const {
18377 // For vectors, we don't have a preference..
18378 if (XVT.isVector())
18379 return false;
18380
18381 if (XVT != MVT::i32 && XVT != MVT::i64)
18382 return false;
18383
18384 // We can use sext.w for RV64 or an srai 31 on RV32.
18385 if (KeptBits == 32 || KeptBits == 64)
18386 return true;
18387
18388 // With Zbb we can use sext.h/sext.b.
18389 return Subtarget.hasStdExtZbb() &&
18390 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
18391 KeptBits == 16);
18392}
18393
18395 const SDNode *N, CombineLevel Level) const {
18396 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
18397 N->getOpcode() == ISD::SRL) &&
18398 "Expected shift op");
18399
18400 // The following folds are only desirable if `(OP _, c1 << c2)` can be
18401 // materialised in fewer instructions than `(OP _, c1)`:
18402 //
18403 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
18404 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
18405 SDValue N0 = N->getOperand(0);
18406 EVT Ty = N0.getValueType();
18407
18408 // LD/ST will optimize constant Offset extraction, so when AddNode is used by
18409 // LD/ST, it can still complete the folding optimization operation performed
18410 // above.
18411 auto isUsedByLdSt = [](const SDNode *X, const SDNode *User) {
18412 for (SDNode *Use : X->users()) {
18413 // This use is the one we're on right now. Skip it
18414 if (Use == User || Use->getOpcode() == ISD::SELECT)
18415 continue;
18416 if (!isa<StoreSDNode>(Use) && !isa<LoadSDNode>(Use))
18417 return false;
18418 }
18419 return true;
18420 };
18421
18422 if (Ty.isScalarInteger() &&
18423 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
18424 if (N0.getOpcode() == ISD::ADD && !N0->hasOneUse())
18425 return isUsedByLdSt(N0.getNode(), N);
18426
18427 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
18428 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
18429
18430 // Bail if we might break a sh{1,2,3}add pattern.
18431 if (Subtarget.hasStdExtZba() && C2 && C2->getZExtValue() >= 1 &&
18432 C2->getZExtValue() <= 3 && N->hasOneUse() &&
18433 N->user_begin()->getOpcode() == ISD::ADD &&
18434 !isUsedByLdSt(*N->user_begin(), nullptr) &&
18435 !isa<ConstantSDNode>(N->user_begin()->getOperand(1)))
18436 return false;
18437
18438 if (C1 && C2) {
18439 const APInt &C1Int = C1->getAPIntValue();
18440 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
18441
18442 // We can materialise `c1 << c2` into an add immediate, so it's "free",
18443 // and the combine should happen, to potentially allow further combines
18444 // later.
18445 if (ShiftedC1Int.getSignificantBits() <= 64 &&
18446 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
18447 return true;
18448
18449 // We can materialise `c1` in an add immediate, so it's "free", and the
18450 // combine should be prevented.
18451 if (C1Int.getSignificantBits() <= 64 &&
18453 return false;
18454
18455 // Neither constant will fit into an immediate, so find materialisation
18456 // costs.
18457 int C1Cost =
18458 RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
18459 /*CompressionCost*/ true);
18460 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
18461 ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
18462 /*CompressionCost*/ true);
18463
18464 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
18465 // combine should be prevented.
18466 if (C1Cost < ShiftedC1Cost)
18467 return false;
18468 }
18469 }
18470
18471 if (!N0->hasOneUse())
18472 return false;
18473
18474 if (N0->getOpcode() == ISD::SIGN_EXTEND &&
18475 N0->getOperand(0)->getOpcode() == ISD::ADD &&
18476 !N0->getOperand(0)->hasOneUse())
18477 return isUsedByLdSt(N0->getOperand(0).getNode(), N0.getNode());
18478
18479 return true;
18480}
18481
18483 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
18484 TargetLoweringOpt &TLO) const {
18485 // Delay this optimization as late as possible.
18486 if (!TLO.LegalOps)
18487 return false;
18488
18489 EVT VT = Op.getValueType();
18490 if (VT.isVector())
18491 return false;
18492
18493 unsigned Opcode = Op.getOpcode();
18494 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
18495 return false;
18496
18497 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
18498 if (!C)
18499 return false;
18500
18501 const APInt &Mask = C->getAPIntValue();
18502
18503 // Clear all non-demanded bits initially.
18504 APInt ShrunkMask = Mask & DemandedBits;
18505
18506 // Try to make a smaller immediate by setting undemanded bits.
18507
18508 APInt ExpandedMask = Mask | ~DemandedBits;
18509
18510 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
18511 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
18512 };
18513 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
18514 if (NewMask == Mask)
18515 return true;
18516 SDLoc DL(Op);
18517 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
18518 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
18519 Op.getOperand(0), NewC);
18520 return TLO.CombineTo(Op, NewOp);
18521 };
18522
18523 // If the shrunk mask fits in sign extended 12 bits, let the target
18524 // independent code apply it.
18525 if (ShrunkMask.isSignedIntN(12))
18526 return false;
18527
18528 // And has a few special cases for zext.
18529 if (Opcode == ISD::AND) {
18530 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
18531 // otherwise use SLLI + SRLI.
18532 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
18533 if (IsLegalMask(NewMask))
18534 return UseMask(NewMask);
18535
18536 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
18537 if (VT == MVT::i64) {
18538 APInt NewMask = APInt(64, 0xffffffff);
18539 if (IsLegalMask(NewMask))
18540 return UseMask(NewMask);
18541 }
18542 }
18543
18544 // For the remaining optimizations, we need to be able to make a negative
18545 // number through a combination of mask and undemanded bits.
18546 if (!ExpandedMask.isNegative())
18547 return false;
18548
18549 // What is the fewest number of bits we need to represent the negative number.
18550 unsigned MinSignedBits = ExpandedMask.getSignificantBits();
18551
18552 // Try to make a 12 bit negative immediate. If that fails try to make a 32
18553 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
18554 // If we can't create a simm12, we shouldn't change opaque constants.
18555 APInt NewMask = ShrunkMask;
18556 if (MinSignedBits <= 12)
18557 NewMask.setBitsFrom(11);
18558 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
18559 NewMask.setBitsFrom(31);
18560 else
18561 return false;
18562
18563 // Check that our new mask is a subset of the demanded mask.
18564 assert(IsLegalMask(NewMask));
18565 return UseMask(NewMask);
18566}
18567
18568static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
18569 static const uint64_t GREVMasks[] = {
18570 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
18571 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
18572
18573 for (unsigned Stage = 0; Stage != 6; ++Stage) {
18574 unsigned Shift = 1 << Stage;
18575 if (ShAmt & Shift) {
18576 uint64_t Mask = GREVMasks[Stage];
18577 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
18578 if (IsGORC)
18579 Res |= x;
18580 x = Res;
18581 }
18582 }
18583
18584 return x;
18585}
18586
18588 KnownBits &Known,
18589 const APInt &DemandedElts,
18590 const SelectionDAG &DAG,
18591 unsigned Depth) const {
18592 unsigned BitWidth = Known.getBitWidth();
18593 unsigned Opc = Op.getOpcode();
18594 assert((Opc >= ISD::BUILTIN_OP_END ||
18595 Opc == ISD::INTRINSIC_WO_CHAIN ||
18596 Opc == ISD::INTRINSIC_W_CHAIN ||
18597 Opc == ISD::INTRINSIC_VOID) &&
18598 "Should use MaskedValueIsZero if you don't know whether Op"
18599 " is a target node!");
18600
18601 Known.resetAll();
18602 switch (Opc) {
18603 default: break;
18604 case RISCVISD::SELECT_CC: {
18605 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
18606 // If we don't know any bits, early out.
18607 if (Known.isUnknown())
18608 break;
18609 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
18610
18611 // Only known if known in both the LHS and RHS.
18612 Known = Known.intersectWith(Known2);
18613 break;
18614 }
18617 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
18618 // Result is either all zero or operand 0. We can propagate zeros, but not
18619 // ones.
18620 Known.One.clearAllBits();
18621 break;
18622 case RISCVISD::REMUW: {
18623 KnownBits Known2;
18624 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
18625 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
18626 // We only care about the lower 32 bits.
18627 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
18628 // Restore the original width by sign extending.
18629 Known = Known.sext(BitWidth);
18630 break;
18631 }
18632 case RISCVISD::DIVUW: {
18633 KnownBits Known2;
18634 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
18635 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
18636 // We only care about the lower 32 bits.
18637 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
18638 // Restore the original width by sign extending.
18639 Known = Known.sext(BitWidth);
18640 break;
18641 }
18642 case RISCVISD::SLLW: {
18643 KnownBits Known2;
18644 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
18645 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
18646 Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));
18647 // Restore the original width by sign extending.
18648 Known = Known.sext(BitWidth);
18649 break;
18650 }
18651 case RISCVISD::CTZW: {
18652 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
18653 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
18654 unsigned LowBits = llvm::bit_width(PossibleTZ);
18655 Known.Zero.setBitsFrom(LowBits);
18656 break;
18657 }
18658 case RISCVISD::CLZW: {
18659 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
18660 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
18661 unsigned LowBits = llvm::bit_width(PossibleLZ);
18662 Known.Zero.setBitsFrom(LowBits);
18663 break;
18664 }
18665 case RISCVISD::BREV8:
18666 case RISCVISD::ORC_B: {
18667 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
18668 // control value of 7 is equivalent to brev8 and orc.b.
18669 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
18670 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
18671 // To compute zeros, we need to invert the value and invert it back after.
18672 Known.Zero =
18673 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
18674 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
18675 break;
18676 }
18677 case RISCVISD::READ_VLENB: {
18678 // We can use the minimum and maximum VLEN values to bound VLENB. We
18679 // know VLEN must be a power of two.
18680 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
18681 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
18682 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
18683 Known.Zero.setLowBits(Log2_32(MinVLenB));
18684 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
18685 if (MaxVLenB == MinVLenB)
18686 Known.One.setBit(Log2_32(MinVLenB));
18687 break;
18688 }
18689 case RISCVISD::FCLASS: {
18690 // fclass will only set one of the low 10 bits.
18691 Known.Zero.setBitsFrom(10);
18692 break;
18693 }
18696 unsigned IntNo =
18697 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
18698 switch (IntNo) {
18699 default:
18700 // We can't do anything for most intrinsics.
18701 break;
18702 case Intrinsic::riscv_vsetvli:
18703 case Intrinsic::riscv_vsetvlimax: {
18704 bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
18705 unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1);
18706 RISCVII::VLMUL VLMUL =
18707 static_cast<RISCVII::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));
18708 unsigned SEW = RISCVVType::decodeVSEW(VSEW);
18709 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
18710 uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
18711 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
18712
18713 // Result of vsetvli must be not larger than AVL.
18714 if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))
18715 MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1));
18716
18717 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
18718 if (BitWidth > KnownZeroFirstBit)
18719 Known.Zero.setBitsFrom(KnownZeroFirstBit);
18720 break;
18721 }
18722 }
18723 break;
18724 }
18725 }
18726}
18727
18729 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
18730 unsigned Depth) const {
18731 switch (Op.getOpcode()) {
18732 default:
18733 break;
18734 case RISCVISD::SELECT_CC: {
18735 unsigned Tmp =
18736 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
18737 if (Tmp == 1) return 1; // Early out.
18738 unsigned Tmp2 =
18739 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
18740 return std::min(Tmp, Tmp2);
18741 }
18744 // Output is either all zero or operand 0. We can propagate sign bit count
18745 // from operand 0.
18746 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
18747 case RISCVISD::ABSW: {
18748 // We expand this at isel to negw+max. The result will have 33 sign bits
18749 // if the input has at least 33 sign bits.
18750 unsigned Tmp =
18751 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
18752 if (Tmp < 33) return 1;
18753 return 33;
18754 }
18755 case RISCVISD::SLLW:
18756 case RISCVISD::SRAW:
18757 case RISCVISD::SRLW:
18758 case RISCVISD::DIVW:
18759 case RISCVISD::DIVUW:
18760 case RISCVISD::REMUW:
18761 case RISCVISD::ROLW:
18762 case RISCVISD::RORW:
18767 // TODO: As the result is sign-extended, this is conservatively correct. A
18768 // more precise answer could be calculated for SRAW depending on known
18769 // bits in the shift amount.
18770 return 33;
18771 case RISCVISD::VMV_X_S: {
18772 // The number of sign bits of the scalar result is computed by obtaining the
18773 // element type of the input vector operand, subtracting its width from the
18774 // XLEN, and then adding one (sign bit within the element type). If the
18775 // element type is wider than XLen, the least-significant XLEN bits are
18776 // taken.
18777 unsigned XLen = Subtarget.getXLen();
18778 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
18779 if (EltBits <= XLen)
18780 return XLen - EltBits + 1;
18781 break;
18782 }
18784 unsigned IntNo = Op.getConstantOperandVal(1);
18785 switch (IntNo) {
18786 default:
18787 break;
18788 case Intrinsic::riscv_masked_atomicrmw_xchg_i64:
18789 case Intrinsic::riscv_masked_atomicrmw_add_i64:
18790 case Intrinsic::riscv_masked_atomicrmw_sub_i64:
18791 case Intrinsic::riscv_masked_atomicrmw_nand_i64:
18792 case Intrinsic::riscv_masked_atomicrmw_max_i64:
18793 case Intrinsic::riscv_masked_atomicrmw_min_i64:
18794 case Intrinsic::riscv_masked_atomicrmw_umax_i64:
18795 case Intrinsic::riscv_masked_atomicrmw_umin_i64:
18796 case Intrinsic::riscv_masked_cmpxchg_i64:
18797 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
18798 // narrow atomic operation. These are implemented using atomic
18799 // operations at the minimum supported atomicrmw/cmpxchg width whose
18800 // result is then sign extended to XLEN. With +A, the minimum width is
18801 // 32 for both 64 and 32.
18802 assert(Subtarget.getXLen() == 64);
18804 assert(Subtarget.hasStdExtA());
18805 return 33;
18806 }
18807 break;
18808 }
18809 }
18810
18811 return 1;
18812}
18813
18815 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
18816 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
18817
18818 // TODO: Add more target nodes.
18819 switch (Op.getOpcode()) {
18821 // Integer select_cc cannot create poison.
18822 // TODO: What are the FP poison semantics?
18823 // TODO: This instruction blocks poison from the unselected operand, can
18824 // we do anything with that?
18825 return !Op.getValueType().isInteger();
18826 }
18828 Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
18829}
18830
18831const Constant *
18833 assert(Ld && "Unexpected null LoadSDNode");
18834 if (!ISD::isNormalLoad(Ld))
18835 return nullptr;
18836
18837 SDValue Ptr = Ld->getBasePtr();
18838
18839 // Only constant pools with no offset are supported.
18840 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
18841 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
18842 if (!CNode || CNode->isMachineConstantPoolEntry() ||
18843 CNode->getOffset() != 0)
18844 return nullptr;
18845
18846 return CNode;
18847 };
18848
18849 // Simple case, LLA.
18850 if (Ptr.getOpcode() == RISCVISD::LLA) {
18851 auto *CNode = GetSupportedConstantPool(Ptr);
18852 if (!CNode || CNode->getTargetFlags() != 0)
18853 return nullptr;
18854
18855 return CNode->getConstVal();
18856 }
18857
18858 // Look for a HI and ADD_LO pair.
18859 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
18860 Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
18861 return nullptr;
18862
18863 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
18864 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
18865
18866 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
18867 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
18868 return nullptr;
18869
18870 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
18871 return nullptr;
18872
18873 return CNodeLo->getConstVal();
18874}
18875
18877 MachineBasicBlock *BB) {
18878 assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction");
18879
18880 // To read a 64-bit counter CSR on a 32-bit target, we read the two halves.
18881 // Should the count have wrapped while it was being read, we need to try
18882 // again.
18883 // For example:
18884 // ```
18885 // read:
18886 // csrrs x3, counterh # load high word of counter
18887 // csrrs x2, counter # load low word of counter
18888 // csrrs x4, counterh # load high word of counter
18889 // bne x3, x4, read # check if high word reads match, otherwise try again
18890 // ```
18891
18892 MachineFunction &MF = *BB->getParent();
18893 const BasicBlock *LLVMBB = BB->getBasicBlock();
18895
18896 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVMBB);
18897 MF.insert(It, LoopMBB);
18898
18899 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVMBB);
18900 MF.insert(It, DoneMBB);
18901
18902 // Transfer the remainder of BB and its successor edges to DoneMBB.
18903 DoneMBB->splice(DoneMBB->begin(), BB,
18904 std::next(MachineBasicBlock::iterator(MI)), BB->end());
18906
18907 BB->addSuccessor(LoopMBB);
18908
18910 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
18911 Register LoReg = MI.getOperand(0).getReg();
18912 Register HiReg = MI.getOperand(1).getReg();
18913 int64_t LoCounter = MI.getOperand(2).getImm();
18914 int64_t HiCounter = MI.getOperand(3).getImm();
18915 DebugLoc DL = MI.getDebugLoc();
18916
18918 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
18919 .addImm(HiCounter)
18920 .addReg(RISCV::X0);
18921 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
18922 .addImm(LoCounter)
18923 .addReg(RISCV::X0);
18924 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
18925 .addImm(HiCounter)
18926 .addReg(RISCV::X0);
18927
18928 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
18929 .addReg(HiReg)
18930 .addReg(ReadAgainReg)
18931 .addMBB(LoopMBB);
18932
18933 LoopMBB->addSuccessor(LoopMBB);
18934 LoopMBB->addSuccessor(DoneMBB);
18935
18936 MI.eraseFromParent();
18937
18938 return DoneMBB;
18939}
18940
18943 const RISCVSubtarget &Subtarget) {
18944 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
18945
18946 MachineFunction &MF = *BB->getParent();
18947 DebugLoc DL = MI.getDebugLoc();
18950 Register LoReg = MI.getOperand(0).getReg();
18951 Register HiReg = MI.getOperand(1).getReg();
18952 Register SrcReg = MI.getOperand(2).getReg();
18953
18954 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
18955 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
18956
18957 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
18958 RI, Register());
18960 MachineMemOperand *MMOLo =
18964 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
18965 .addFrameIndex(FI)
18966 .addImm(0)
18967 .addMemOperand(MMOLo);
18968 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
18969 .addFrameIndex(FI)
18970 .addImm(4)
18971 .addMemOperand(MMOHi);
18972 MI.eraseFromParent(); // The pseudo instruction is gone now.
18973 return BB;
18974}
18975
18978 const RISCVSubtarget &Subtarget) {
18979 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
18980 "Unexpected instruction");
18981
18982 MachineFunction &MF = *BB->getParent();
18983 DebugLoc DL = MI.getDebugLoc();
18986 Register DstReg = MI.getOperand(0).getReg();
18987 Register LoReg = MI.getOperand(1).getReg();
18988 Register HiReg = MI.getOperand(2).getReg();
18989
18990 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
18991 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
18992
18994 MachineMemOperand *MMOLo =
18998 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
18999 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
19000 .addFrameIndex(FI)
19001 .addImm(0)
19002 .addMemOperand(MMOLo);
19003 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
19004 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
19005 .addFrameIndex(FI)
19006 .addImm(4)
19007 .addMemOperand(MMOHi);
19008 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
19009 MI.eraseFromParent(); // The pseudo instruction is gone now.
19010 return BB;
19011}
19012
19014 switch (MI.getOpcode()) {
19015 default:
19016 return false;
19017 case RISCV::Select_GPR_Using_CC_GPR:
19018 case RISCV::Select_GPR_Using_CC_Imm:
19019 case RISCV::Select_FPR16_Using_CC_GPR:
19020 case RISCV::Select_FPR16INX_Using_CC_GPR:
19021 case RISCV::Select_FPR32_Using_CC_GPR:
19022 case RISCV::Select_FPR32INX_Using_CC_GPR:
19023 case RISCV::Select_FPR64_Using_CC_GPR:
19024 case RISCV::Select_FPR64INX_Using_CC_GPR:
19025 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
19026 return true;
19027 }
19028}
19029
19031 unsigned RelOpcode, unsigned EqOpcode,
19032 const RISCVSubtarget &Subtarget) {
19033 DebugLoc DL = MI.getDebugLoc();
19034 Register DstReg = MI.getOperand(0).getReg();
19035 Register Src1Reg = MI.getOperand(1).getReg();
19036 Register Src2Reg = MI.getOperand(2).getReg();
19038 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
19040
19041 // Save the current FFLAGS.
19042 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
19043
19044 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
19045 .addReg(Src1Reg)
19046 .addReg(Src2Reg);
19049
19050 // Restore the FFLAGS.
19051 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
19052 .addReg(SavedFFlags, RegState::Kill);
19053
19054 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
19055 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
19056 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
19057 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
19060
19061 // Erase the pseudoinstruction.
19062 MI.eraseFromParent();
19063 return BB;
19064}
19065
19066static MachineBasicBlock *
19068 MachineBasicBlock *ThisMBB,
19069 const RISCVSubtarget &Subtarget) {
19070 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
19071 // Without this, custom-inserter would have generated:
19072 //
19073 // A
19074 // | \
19075 // | B
19076 // | /
19077 // C
19078 // | \
19079 // | D
19080 // | /
19081 // E
19082 //
19083 // A: X = ...; Y = ...
19084 // B: empty
19085 // C: Z = PHI [X, A], [Y, B]
19086 // D: empty
19087 // E: PHI [X, C], [Z, D]
19088 //
19089 // If we lower both Select_FPRX_ in a single step, we can instead generate:
19090 //
19091 // A
19092 // | \
19093 // | C
19094 // | /|
19095 // |/ |
19096 // | |
19097 // | D
19098 // | /
19099 // E
19100 //
19101 // A: X = ...; Y = ...
19102 // D: empty
19103 // E: PHI [X, A], [X, C], [Y, D]
19104
19105 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
19106 const DebugLoc &DL = First.getDebugLoc();
19107 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
19108 MachineFunction *F = ThisMBB->getParent();
19109 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
19110 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
19111 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
19112 MachineFunction::iterator It = ++ThisMBB->getIterator();
19113 F->insert(It, FirstMBB);
19114 F->insert(It, SecondMBB);
19115 F->insert(It, SinkMBB);
19116
19117 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
19118 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
19120 ThisMBB->end());
19121 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
19122
19123 // Fallthrough block for ThisMBB.
19124 ThisMBB->addSuccessor(FirstMBB);
19125 // Fallthrough block for FirstMBB.
19126 FirstMBB->addSuccessor(SecondMBB);
19127 ThisMBB->addSuccessor(SinkMBB);
19128 FirstMBB->addSuccessor(SinkMBB);
19129 // This is fallthrough.
19130 SecondMBB->addSuccessor(SinkMBB);
19131
19132 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
19133 Register FLHS = First.getOperand(1).getReg();
19134 Register FRHS = First.getOperand(2).getReg();
19135 // Insert appropriate branch.
19136 BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC))
19137 .addReg(FLHS)
19138 .addReg(FRHS)
19139 .addMBB(SinkMBB);
19140
19141 Register SLHS = Second.getOperand(1).getReg();
19142 Register SRHS = Second.getOperand(2).getReg();
19143 Register Op1Reg4 = First.getOperand(4).getReg();
19144 Register Op1Reg5 = First.getOperand(5).getReg();
19145
19146 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
19147 // Insert appropriate branch.
19148 BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC))
19149 .addReg(SLHS)
19150 .addReg(SRHS)
19151 .addMBB(SinkMBB);
19152
19153 Register DestReg = Second.getOperand(0).getReg();
19154 Register Op2Reg4 = Second.getOperand(4).getReg();
19155 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
19156 .addReg(Op2Reg4)
19157 .addMBB(ThisMBB)
19158 .addReg(Op1Reg4)
19159 .addMBB(FirstMBB)
19160 .addReg(Op1Reg5)
19161 .addMBB(SecondMBB);
19162
19163 // Now remove the Select_FPRX_s.
19164 First.eraseFromParent();
19165 Second.eraseFromParent();
19166 return SinkMBB;
19167}
19168
19171 const RISCVSubtarget &Subtarget) {
19172 // To "insert" Select_* instructions, we actually have to insert the triangle
19173 // control-flow pattern. The incoming instructions know the destination vreg
19174 // to set, the condition code register to branch on, the true/false values to
19175 // select between, and the condcode to use to select the appropriate branch.
19176 //
19177 // We produce the following control flow:
19178 // HeadMBB
19179 // | \
19180 // | IfFalseMBB
19181 // | /
19182 // TailMBB
19183 //
19184 // When we find a sequence of selects we attempt to optimize their emission
19185 // by sharing the control flow. Currently we only handle cases where we have
19186 // multiple selects with the exact same condition (same LHS, RHS and CC).
19187 // The selects may be interleaved with other instructions if the other
19188 // instructions meet some requirements we deem safe:
19189 // - They are not pseudo instructions.
19190 // - They are debug instructions. Otherwise,
19191 // - They do not have side-effects, do not access memory and their inputs do
19192 // not depend on the results of the select pseudo-instructions.
19193 // The TrueV/FalseV operands of the selects cannot depend on the result of
19194 // previous selects in the sequence.
19195 // These conditions could be further relaxed. See the X86 target for a
19196 // related approach and more information.
19197 //
19198 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
19199 // is checked here and handled by a separate function -
19200 // EmitLoweredCascadedSelect.
19201
19202 auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
19203 if ((MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR &&
19204 MI.getOpcode() != RISCV::Select_GPR_Using_CC_Imm) &&
19205 Next != BB->end() && Next->getOpcode() == MI.getOpcode() &&
19206 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
19207 Next->getOperand(5).isKill())
19208 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
19209
19210 Register LHS = MI.getOperand(1).getReg();
19211 Register RHS;
19212 if (MI.getOperand(2).isReg())
19213 RHS = MI.getOperand(2).getReg();
19214 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
19215
19216 SmallVector<MachineInstr *, 4> SelectDebugValues;
19217 SmallSet<Register, 4> SelectDests;
19218 SelectDests.insert(MI.getOperand(0).getReg());
19219
19220 MachineInstr *LastSelectPseudo = &MI;
19221 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
19222 SequenceMBBI != E; ++SequenceMBBI) {
19223 if (SequenceMBBI->isDebugInstr())
19224 continue;
19225 if (isSelectPseudo(*SequenceMBBI)) {
19226 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
19227 !SequenceMBBI->getOperand(2).isReg() ||
19228 SequenceMBBI->getOperand(2).getReg() != RHS ||
19229 SequenceMBBI->getOperand(3).getImm() != CC ||
19230 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
19231 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
19232 break;
19233 LastSelectPseudo = &*SequenceMBBI;
19234 SequenceMBBI->collectDebugValues(SelectDebugValues);
19235 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
19236 continue;
19237 }
19238 if (SequenceMBBI->hasUnmodeledSideEffects() ||
19239 SequenceMBBI->mayLoadOrStore() ||
19240 SequenceMBBI->usesCustomInsertionHook())
19241 break;
19242 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
19243 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
19244 }))
19245 break;
19246 }
19247
19248 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
19249 const BasicBlock *LLVM_BB = BB->getBasicBlock();
19250 DebugLoc DL = MI.getDebugLoc();
19252
19253 MachineBasicBlock *HeadMBB = BB;
19254 MachineFunction *F = BB->getParent();
19255 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
19256 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
19257
19258 F->insert(I, IfFalseMBB);
19259 F->insert(I, TailMBB);
19260
19261 // Set the call frame size on entry to the new basic blocks.
19262 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
19263 IfFalseMBB->setCallFrameSize(CallFrameSize);
19264 TailMBB->setCallFrameSize(CallFrameSize);
19265
19266 // Transfer debug instructions associated with the selects to TailMBB.
19267 for (MachineInstr *DebugInstr : SelectDebugValues) {
19268 TailMBB->push_back(DebugInstr->removeFromParent());
19269 }
19270
19271 // Move all instructions after the sequence to TailMBB.
19272 TailMBB->splice(TailMBB->end(), HeadMBB,
19273 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
19274 // Update machine-CFG edges by transferring all successors of the current
19275 // block to the new block which will contain the Phi nodes for the selects.
19276 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
19277 // Set the successors for HeadMBB.
19278 HeadMBB->addSuccessor(IfFalseMBB);
19279 HeadMBB->addSuccessor(TailMBB);
19280
19281 // Insert appropriate branch.
19282 if (MI.getOperand(2).isImm())
19283 BuildMI(HeadMBB, DL, TII.getBrCond(CC, MI.getOperand(2).isImm()))
19284 .addReg(LHS)
19285 .addImm(MI.getOperand(2).getImm())
19286 .addMBB(TailMBB);
19287 else
19288 BuildMI(HeadMBB, DL, TII.getBrCond(CC))
19289 .addReg(LHS)
19290 .addReg(RHS)
19291 .addMBB(TailMBB);
19292
19293 // IfFalseMBB just falls through to TailMBB.
19294 IfFalseMBB->addSuccessor(TailMBB);
19295
19296 // Create PHIs for all of the select pseudo-instructions.
19297 auto SelectMBBI = MI.getIterator();
19298 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
19299 auto InsertionPoint = TailMBB->begin();
19300 while (SelectMBBI != SelectEnd) {
19301 auto Next = std::next(SelectMBBI);
19302 if (isSelectPseudo(*SelectMBBI)) {
19303 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
19304 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
19305 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
19306 .addReg(SelectMBBI->getOperand(4).getReg())
19307 .addMBB(HeadMBB)
19308 .addReg(SelectMBBI->getOperand(5).getReg())
19309 .addMBB(IfFalseMBB);
19310 SelectMBBI->eraseFromParent();
19311 }
19312 SelectMBBI = Next;
19313 }
19314
19315 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
19316 return TailMBB;
19317}
19318
19319// Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW.
19320static const RISCV::RISCVMaskedPseudoInfo *
19321lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW) {
19323 RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW);
19324 assert(Inverse && "Unexpected LMUL and SEW pair for instruction");
19326 RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo);
19327 assert(Masked && "Could not find masked instruction for LMUL and SEW pair");
19328 return Masked;
19329}
19330
19333 unsigned CVTXOpc) {
19334 DebugLoc DL = MI.getDebugLoc();
19335
19337
19339 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
19340
19341 // Save the old value of FFLAGS.
19342 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
19343
19344 assert(MI.getNumOperands() == 7);
19345
19346 // Emit a VFCVT_X_F
19347 const TargetRegisterInfo *TRI =
19349 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
19350 Register Tmp = MRI.createVirtualRegister(RC);
19351 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
19352 .add(MI.getOperand(1))
19353 .add(MI.getOperand(2))
19354 .add(MI.getOperand(3))
19355 .add(MachineOperand::CreateImm(7)) // frm = DYN
19356 .add(MI.getOperand(4))
19357 .add(MI.getOperand(5))
19358 .add(MI.getOperand(6))
19359 .add(MachineOperand::CreateReg(RISCV::FRM,
19360 /*IsDef*/ false,
19361 /*IsImp*/ true));
19362
19363 // Emit a VFCVT_F_X
19364 RISCVII::VLMUL LMul = RISCVII::getLMul(MI.getDesc().TSFlags);
19365 unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
19366 // There is no E8 variant for VFCVT_F_X.
19367 assert(Log2SEW >= 4);
19368 unsigned CVTFOpc =
19369 lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, 1 << Log2SEW)
19370 ->MaskedPseudo;
19371
19372 BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
19373 .add(MI.getOperand(0))
19374 .add(MI.getOperand(1))
19375 .addReg(Tmp)
19376 .add(MI.getOperand(3))
19377 .add(MachineOperand::CreateImm(7)) // frm = DYN
19378 .add(MI.getOperand(4))
19379 .add(MI.getOperand(5))
19380 .add(MI.getOperand(6))
19381 .add(MachineOperand::CreateReg(RISCV::FRM,
19382 /*IsDef*/ false,
19383 /*IsImp*/ true));
19384
19385 // Restore FFLAGS.
19386 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
19387 .addReg(SavedFFLAGS, RegState::Kill);
19388
19389 // Erase the pseudoinstruction.
19390 MI.eraseFromParent();
19391 return BB;
19392}
19393
19395 const RISCVSubtarget &Subtarget) {
19396 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
19397 const TargetRegisterClass *RC;
19398 switch (MI.getOpcode()) {
19399 default:
19400 llvm_unreachable("Unexpected opcode");
19401 case RISCV::PseudoFROUND_H:
19402 CmpOpc = RISCV::FLT_H;
19403 F2IOpc = RISCV::FCVT_W_H;
19404 I2FOpc = RISCV::FCVT_H_W;
19405 FSGNJOpc = RISCV::FSGNJ_H;
19406 FSGNJXOpc = RISCV::FSGNJX_H;
19407 RC = &RISCV::FPR16RegClass;
19408 break;
19409 case RISCV::PseudoFROUND_H_INX:
19410 CmpOpc = RISCV::FLT_H_INX;
19411 F2IOpc = RISCV::FCVT_W_H_INX;
19412 I2FOpc = RISCV::FCVT_H_W_INX;
19413 FSGNJOpc = RISCV::FSGNJ_H_INX;
19414 FSGNJXOpc = RISCV::FSGNJX_H_INX;
19415 RC = &RISCV::GPRF16RegClass;
19416 break;
19417 case RISCV::PseudoFROUND_S:
19418 CmpOpc = RISCV::FLT_S;
19419 F2IOpc = RISCV::FCVT_W_S;
19420 I2FOpc = RISCV::FCVT_S_W;
19421 FSGNJOpc = RISCV::FSGNJ_S;
19422 FSGNJXOpc = RISCV::FSGNJX_S;
19423 RC = &RISCV::FPR32RegClass;
19424 break;
19425 case RISCV::PseudoFROUND_S_INX:
19426 CmpOpc = RISCV::FLT_S_INX;
19427 F2IOpc = RISCV::FCVT_W_S_INX;
19428 I2FOpc = RISCV::FCVT_S_W_INX;
19429 FSGNJOpc = RISCV::FSGNJ_S_INX;
19430 FSGNJXOpc = RISCV::FSGNJX_S_INX;
19431 RC = &RISCV::GPRF32RegClass;
19432 break;
19433 case RISCV::PseudoFROUND_D:
19434 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
19435 CmpOpc = RISCV::FLT_D;
19436 F2IOpc = RISCV::FCVT_L_D;
19437 I2FOpc = RISCV::FCVT_D_L;
19438 FSGNJOpc = RISCV::FSGNJ_D;
19439 FSGNJXOpc = RISCV::FSGNJX_D;
19440 RC = &RISCV::FPR64RegClass;
19441 break;
19442 case RISCV::PseudoFROUND_D_INX:
19443 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
19444 CmpOpc = RISCV::FLT_D_INX;
19445 F2IOpc = RISCV::FCVT_L_D_INX;
19446 I2FOpc = RISCV::FCVT_D_L_INX;
19447 FSGNJOpc = RISCV::FSGNJ_D_INX;
19448 FSGNJXOpc = RISCV::FSGNJX_D_INX;
19449 RC = &RISCV::GPRRegClass;
19450 break;
19451 }
19452
19453 const BasicBlock *BB = MBB->getBasicBlock();
19454 DebugLoc DL = MI.getDebugLoc();
19456
19458 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
19459 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
19460
19461 F->insert(I, CvtMBB);
19462 F->insert(I, DoneMBB);
19463 // Move all instructions after the sequence to DoneMBB.
19464 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
19465 MBB->end());
19466 // Update machine-CFG edges by transferring all successors of the current
19467 // block to the new block which will contain the Phi nodes for the selects.
19469 // Set the successors for MBB.
19470 MBB->addSuccessor(CvtMBB);
19471 MBB->addSuccessor(DoneMBB);
19472
19473 Register DstReg = MI.getOperand(0).getReg();
19474 Register SrcReg = MI.getOperand(1).getReg();
19475 Register MaxReg = MI.getOperand(2).getReg();
19476 int64_t FRM = MI.getOperand(3).getImm();
19477
19478 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
19480
19481 Register FabsReg = MRI.createVirtualRegister(RC);
19482 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
19483
19484 // Compare the FP value to the max value.
19485 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
19486 auto MIB =
19487 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
19490
19491 // Insert branch.
19492 BuildMI(MBB, DL, TII.get(RISCV::BEQ))
19493 .addReg(CmpReg)
19494 .addReg(RISCV::X0)
19495 .addMBB(DoneMBB);
19496
19497 CvtMBB->addSuccessor(DoneMBB);
19498
19499 // Convert to integer.
19500 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
19501 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
19504
19505 // Convert back to FP.
19506 Register I2FReg = MRI.createVirtualRegister(RC);
19507 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
19510
19511 // Restore the sign bit.
19512 Register CvtReg = MRI.createVirtualRegister(RC);
19513 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
19514
19515 // Merge the results.
19516 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
19517 .addReg(SrcReg)
19518 .addMBB(MBB)
19519 .addReg(CvtReg)
19520 .addMBB(CvtMBB);
19521
19522 MI.eraseFromParent();
19523 return DoneMBB;
19524}
19525
19528 MachineBasicBlock *BB) const {
19529 switch (MI.getOpcode()) {
19530 default:
19531 llvm_unreachable("Unexpected instr type to insert");
19532 case RISCV::ReadCounterWide:
19533 assert(!Subtarget.is64Bit() &&
19534 "ReadCounterWide is only to be used on riscv32");
19535 return emitReadCounterWidePseudo(MI, BB);
19536 case RISCV::Select_GPR_Using_CC_GPR:
19537 case RISCV::Select_GPR_Using_CC_Imm:
19538 case RISCV::Select_FPR16_Using_CC_GPR:
19539 case RISCV::Select_FPR16INX_Using_CC_GPR:
19540 case RISCV::Select_FPR32_Using_CC_GPR:
19541 case RISCV::Select_FPR32INX_Using_CC_GPR:
19542 case RISCV::Select_FPR64_Using_CC_GPR:
19543 case RISCV::Select_FPR64INX_Using_CC_GPR:
19544 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
19545 return emitSelectPseudo(MI, BB, Subtarget);
19546 case RISCV::BuildPairF64Pseudo:
19547 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
19548 case RISCV::SplitF64Pseudo:
19549 return emitSplitF64Pseudo(MI, BB, Subtarget);
19550 case RISCV::PseudoQuietFLE_H:
19551 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
19552 case RISCV::PseudoQuietFLE_H_INX:
19553 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
19554 case RISCV::PseudoQuietFLT_H:
19555 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
19556 case RISCV::PseudoQuietFLT_H_INX:
19557 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
19558 case RISCV::PseudoQuietFLE_S:
19559 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
19560 case RISCV::PseudoQuietFLE_S_INX:
19561 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
19562 case RISCV::PseudoQuietFLT_S:
19563 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
19564 case RISCV::PseudoQuietFLT_S_INX:
19565 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
19566 case RISCV::PseudoQuietFLE_D:
19567 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
19568 case RISCV::PseudoQuietFLE_D_INX:
19569 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
19570 case RISCV::PseudoQuietFLE_D_IN32X:
19571 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
19572 Subtarget);
19573 case RISCV::PseudoQuietFLT_D:
19574 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
19575 case RISCV::PseudoQuietFLT_D_INX:
19576 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
19577 case RISCV::PseudoQuietFLT_D_IN32X:
19578 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
19579 Subtarget);
19580
19581 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
19582 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
19583 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
19584 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
19585 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
19586 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
19587 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
19588 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
19589 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
19590 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
19591 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
19592 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
19593 case RISCV::PseudoFROUND_H:
19594 case RISCV::PseudoFROUND_H_INX:
19595 case RISCV::PseudoFROUND_S:
19596 case RISCV::PseudoFROUND_S_INX:
19597 case RISCV::PseudoFROUND_D:
19598 case RISCV::PseudoFROUND_D_INX:
19599 case RISCV::PseudoFROUND_D_IN32X:
19600 return emitFROUND(MI, BB, Subtarget);
19601 case TargetOpcode::STATEPOINT:
19602 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
19603 // while jal call instruction (where statepoint will be lowered at the end)
19604 // has implicit def. This def is early-clobber as it will be set at
19605 // the moment of the call and earlier than any use is read.
19606 // Add this implicit dead def here as a workaround.
19607 MI.addOperand(*MI.getMF(),
19609 RISCV::X1, /*isDef*/ true,
19610 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
19611 /*isUndef*/ false, /*isEarlyClobber*/ true));
19612 [[fallthrough]];
19613 case TargetOpcode::STACKMAP:
19614 case TargetOpcode::PATCHPOINT:
19615 if (!Subtarget.is64Bit())
19616 report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only "
19617 "supported on 64-bit targets");
19618 return emitPatchPoint(MI, BB);
19619 }
19620}
19621
19623 SDNode *Node) const {
19624 // Add FRM dependency to any instructions with dynamic rounding mode.
19625 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
19626 if (Idx < 0) {
19627 // Vector pseudos have FRM index indicated by TSFlags.
19628 Idx = RISCVII::getFRMOpNum(MI.getDesc());
19629 if (Idx < 0)
19630 return;
19631 }
19632 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
19633 return;
19634 // If the instruction already reads FRM, don't add another read.
19635 if (MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr))
19636 return;
19637 MI.addOperand(
19638 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
19639}
19640
19641void RISCVTargetLowering::analyzeInputArgs(
19642 MachineFunction &MF, CCState &CCInfo,
19643 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
19644 RISCVCCAssignFn Fn) const {
19645 unsigned NumArgs = Ins.size();
19647
19648 for (unsigned i = 0; i != NumArgs; ++i) {
19649 MVT ArgVT = Ins[i].VT;
19650 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
19651
19652 Type *ArgTy = nullptr;
19653 if (IsRet)
19654 ArgTy = FType->getReturnType();
19655 else if (Ins[i].isOrigArg())
19656 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
19657
19658 if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo,
19659 /*IsFixed=*/true, IsRet, ArgTy)) {
19660 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
19661 << ArgVT << '\n');
19662 llvm_unreachable(nullptr);
19663 }
19664 }
19665}
19666
19667void RISCVTargetLowering::analyzeOutputArgs(
19668 MachineFunction &MF, CCState &CCInfo,
19669 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
19670 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
19671 unsigned NumArgs = Outs.size();
19672
19673 for (unsigned i = 0; i != NumArgs; i++) {
19674 MVT ArgVT = Outs[i].VT;
19675 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
19676 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
19677
19678 if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo,
19679 Outs[i].IsFixed, IsRet, OrigTy)) {
19680 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
19681 << ArgVT << "\n");
19682 llvm_unreachable(nullptr);
19683 }
19684 }
19685}
19686
19687// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
19688// values.
19690 const CCValAssign &VA, const SDLoc &DL,
19691 const RISCVSubtarget &Subtarget) {
19692 if (VA.needsCustom()) {
19693 if (VA.getLocVT().isInteger() &&
19694 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
19695 return DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
19696 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
19697 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
19699 return convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
19700 llvm_unreachable("Unexpected Custom handling.");
19701 }
19702
19703 switch (VA.getLocInfo()) {
19704 default:
19705 llvm_unreachable("Unexpected CCValAssign::LocInfo");
19706 case CCValAssign::Full:
19707 break;
19708 case CCValAssign::BCvt:
19709 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
19710 break;
19711 }
19712 return Val;
19713}
19714
19715// The caller is responsible for loading the full value if the argument is
19716// passed with CCValAssign::Indirect.
19718 const CCValAssign &VA, const SDLoc &DL,
19719 const ISD::InputArg &In,
19720 const RISCVTargetLowering &TLI) {
19723 EVT LocVT = VA.getLocVT();
19724 SDValue Val;
19725 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
19726 Register VReg = RegInfo.createVirtualRegister(RC);
19727 RegInfo.addLiveIn(VA.getLocReg(), VReg);
19728 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
19729
19730 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
19731 if (In.isOrigArg()) {
19732 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
19733 if (OrigArg->getType()->isIntegerTy()) {
19734 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
19735 // An input zero extended from i31 can also be considered sign extended.
19736 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
19737 (BitWidth < 32 && In.Flags.isZExt())) {
19739 RVFI->addSExt32Register(VReg);
19740 }
19741 }
19742 }
19743
19745 return Val;
19746
19747 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
19748}
19749
19751 const CCValAssign &VA, const SDLoc &DL,
19752 const RISCVSubtarget &Subtarget) {
19753 EVT LocVT = VA.getLocVT();
19754
19755 if (VA.needsCustom()) {
19756 if (LocVT.isInteger() &&
19757 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
19758 return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
19759 if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32)
19760 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
19761 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
19762 return convertToScalableVector(LocVT, Val, DAG, Subtarget);
19763 llvm_unreachable("Unexpected Custom handling.");
19764 }
19765
19766 switch (VA.getLocInfo()) {
19767 default:
19768 llvm_unreachable("Unexpected CCValAssign::LocInfo");
19769 case CCValAssign::Full:
19770 break;
19771 case CCValAssign::BCvt:
19772 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
19773 break;
19774 }
19775 return Val;
19776}
19777
19778// The caller is responsible for loading the full value if the argument is
19779// passed with CCValAssign::Indirect.
19781 const CCValAssign &VA, const SDLoc &DL) {
19783 MachineFrameInfo &MFI = MF.getFrameInfo();
19784 EVT LocVT = VA.getLocVT();
19785 EVT ValVT = VA.getValVT();
19787 if (VA.getLocInfo() == CCValAssign::Indirect) {
19788 // When the value is a scalable vector, we save the pointer which points to
19789 // the scalable vector value in the stack. The ValVT will be the pointer
19790 // type, instead of the scalable vector type.
19791 ValVT = LocVT;
19792 }
19793 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
19794 /*IsImmutable=*/true);
19795 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
19796 SDValue Val;
19797
19799 switch (VA.getLocInfo()) {
19800 default:
19801 llvm_unreachable("Unexpected CCValAssign::LocInfo");
19802 case CCValAssign::Full:
19804 case CCValAssign::BCvt:
19805 break;
19806 }
19807 Val = DAG.getExtLoad(
19808 ExtType, DL, LocVT, Chain, FIN,
19810 return Val;
19811}
19812
19814 const CCValAssign &VA,
19815 const CCValAssign &HiVA,
19816 const SDLoc &DL) {
19817 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
19818 "Unexpected VA");
19820 MachineFrameInfo &MFI = MF.getFrameInfo();
19822
19823 assert(VA.isRegLoc() && "Expected register VA assignment");
19824
19825 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
19826 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
19827 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
19828 SDValue Hi;
19829 if (HiVA.isMemLoc()) {
19830 // Second half of f64 is passed on the stack.
19831 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
19832 /*IsImmutable=*/true);
19833 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
19834 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
19836 } else {
19837 // Second half of f64 is passed in another GPR.
19838 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
19839 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
19840 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
19841 }
19842 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
19843}
19844
19845// Transform physical registers into virtual registers.
19847 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
19848 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
19849 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
19850
19852
19853 switch (CallConv) {
19854 default:
19855 report_fatal_error("Unsupported calling convention");
19856 case CallingConv::C:
19857 case CallingConv::Fast:
19859 case CallingConv::GRAAL:
19861 break;
19862 case CallingConv::GHC:
19863 if (Subtarget.hasStdExtE())
19864 report_fatal_error("GHC calling convention is not supported on RVE!");
19865 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
19866 report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
19867 "(Zdinx/D) instruction set extensions");
19868 }
19869
19870 const Function &Func = MF.getFunction();
19871 if (Func.hasFnAttribute("interrupt")) {
19872 if (!Func.arg_empty())
19874 "Functions with the interrupt attribute cannot have arguments!");
19875
19876 StringRef Kind =
19877 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
19878
19879 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
19881 "Function interrupt attribute argument not supported!");
19882 }
19883
19884 EVT PtrVT = getPointerTy(DAG.getDataLayout());
19885 MVT XLenVT = Subtarget.getXLenVT();
19886 unsigned XLenInBytes = Subtarget.getXLen() / 8;
19887 // Used with vargs to acumulate store chains.
19888 std::vector<SDValue> OutChains;
19889
19890 // Assign locations to all of the incoming arguments.
19892 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
19893
19894 if (CallConv == CallingConv::GHC)
19896 else
19897 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
19899 : CC_RISCV);
19900
19901 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
19902 CCValAssign &VA = ArgLocs[i];
19903 SDValue ArgValue;
19904 // Passing f64 on RV32D with a soft float ABI must be handled as a special
19905 // case.
19906 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
19907 assert(VA.needsCustom());
19908 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
19909 } else if (VA.isRegLoc())
19910 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
19911 else
19912 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
19913
19914 if (VA.getLocInfo() == CCValAssign::Indirect) {
19915 // If the original argument was split and passed by reference (e.g. i128
19916 // on RV32), we need to load all parts of it here (using the same
19917 // address). Vectors may be partly split to registers and partly to the
19918 // stack, in which case the base address is partly offset and subsequent
19919 // stores are relative to that.
19920 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
19922 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
19923 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
19924 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
19925 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
19926 CCValAssign &PartVA = ArgLocs[i + 1];
19927 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
19928 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
19929 if (PartVA.getValVT().isScalableVector())
19930 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
19931 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
19932 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
19934 ++i;
19935 ++InsIdx;
19936 }
19937 continue;
19938 }
19939 InVals.push_back(ArgValue);
19940 }
19941
19942 if (any_of(ArgLocs,
19943 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
19944 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
19945
19946 if (IsVarArg) {
19947 ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());
19948 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
19949 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
19950 MachineFrameInfo &MFI = MF.getFrameInfo();
19951 MachineRegisterInfo &RegInfo = MF.getRegInfo();
19953
19954 // Size of the vararg save area. For now, the varargs save area is either
19955 // zero or large enough to hold a0-a7.
19956 int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
19957 int FI;
19958
19959 // If all registers are allocated, then all varargs must be passed on the
19960 // stack and we don't need to save any argregs.
19961 if (VarArgsSaveSize == 0) {
19962 int VaArgOffset = CCInfo.getStackSize();
19963 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
19964 } else {
19965 int VaArgOffset = -VarArgsSaveSize;
19966 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
19967
19968 // If saving an odd number of registers then create an extra stack slot to
19969 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
19970 // offsets to even-numbered registered remain 2*XLEN-aligned.
19971 if (Idx % 2) {
19973 XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);
19974 VarArgsSaveSize += XLenInBytes;
19975 }
19976
19977 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
19978
19979 // Copy the integer registers that may have been used for passing varargs
19980 // to the vararg save area.
19981 for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
19982 const Register Reg = RegInfo.createVirtualRegister(RC);
19983 RegInfo.addLiveIn(ArgRegs[I], Reg);
19984 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
19985 SDValue Store = DAG.getStore(
19986 Chain, DL, ArgValue, FIN,
19987 MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));
19988 OutChains.push_back(Store);
19989 FIN =
19990 DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);
19991 }
19992 }
19993
19994 // Record the frame index of the first variable argument
19995 // which is a value necessary to VASTART.
19996 RVFI->setVarArgsFrameIndex(FI);
19997 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
19998 }
19999
20000 // All stores are grouped in one node to allow the matching between
20001 // the size of Ins and InVals. This only happens for vararg functions.
20002 if (!OutChains.empty()) {
20003 OutChains.push_back(Chain);
20004 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
20005 }
20006
20007 return Chain;
20008}
20009
20010/// isEligibleForTailCallOptimization - Check whether the call is eligible
20011/// for tail call optimization.
20012/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
20013bool RISCVTargetLowering::isEligibleForTailCallOptimization(
20014 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
20015 const SmallVector<CCValAssign, 16> &ArgLocs) const {
20016
20017 auto CalleeCC = CLI.CallConv;
20018 auto &Outs = CLI.Outs;
20019 auto &Caller = MF.getFunction();
20020 auto CallerCC = Caller.getCallingConv();
20021
20022 // Exception-handling functions need a special set of instructions to
20023 // indicate a return to the hardware. Tail-calling another function would
20024 // probably break this.
20025 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
20026 // should be expanded as new function attributes are introduced.
20027 if (Caller.hasFnAttribute("interrupt"))
20028 return false;
20029
20030 // Do not tail call opt if the stack is used to pass parameters.
20031 if (CCInfo.getStackSize() != 0)
20032 return false;
20033
20034 // Do not tail call opt if any parameters need to be passed indirectly.
20035 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
20036 // passed indirectly. So the address of the value will be passed in a
20037 // register, or if not available, then the address is put on the stack. In
20038 // order to pass indirectly, space on the stack often needs to be allocated
20039 // in order to store the value. In this case the CCInfo.getNextStackOffset()
20040 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
20041 // are passed CCValAssign::Indirect.
20042 for (auto &VA : ArgLocs)
20043 if (VA.getLocInfo() == CCValAssign::Indirect)
20044 return false;
20045
20046 // Do not tail call opt if either caller or callee uses struct return
20047 // semantics.
20048 auto IsCallerStructRet = Caller.hasStructRetAttr();
20049 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
20050 if (IsCallerStructRet || IsCalleeStructRet)
20051 return false;
20052
20053 // The callee has to preserve all registers the caller needs to preserve.
20054 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
20055 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
20056 if (CalleeCC != CallerCC) {
20057 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
20058 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
20059 return false;
20060 }
20061
20062 // Byval parameters hand the function a pointer directly into the stack area
20063 // we want to reuse during a tail call. Working around this *is* possible
20064 // but less efficient and uglier in LowerCall.
20065 for (auto &Arg : Outs)
20066 if (Arg.Flags.isByVal())
20067 return false;
20068
20069 return true;
20070}
20071
20073 return DAG.getDataLayout().getPrefTypeAlign(
20074 VT.getTypeForEVT(*DAG.getContext()));
20075}
20076
20077// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
20078// and output parameter nodes.
20080 SmallVectorImpl<SDValue> &InVals) const {
20081 SelectionDAG &DAG = CLI.DAG;
20082 SDLoc &DL = CLI.DL;
20084 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
20086 SDValue Chain = CLI.Chain;
20087 SDValue Callee = CLI.Callee;
20088 bool &IsTailCall = CLI.IsTailCall;
20089 CallingConv::ID CallConv = CLI.CallConv;
20090 bool IsVarArg = CLI.IsVarArg;
20091 EVT PtrVT = getPointerTy(DAG.getDataLayout());
20092 MVT XLenVT = Subtarget.getXLenVT();
20093
20095
20096 // Analyze the operands of the call, assigning locations to each operand.
20098 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
20099
20100 if (CallConv == CallingConv::GHC) {
20101 if (Subtarget.hasStdExtE())
20102 report_fatal_error("GHC calling convention is not supported on RVE!");
20103 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
20104 } else
20105 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
20107 : CC_RISCV);
20108
20109 // Check if it's really possible to do a tail call.
20110 if (IsTailCall)
20111 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
20112
20113 if (IsTailCall)
20114 ++NumTailCalls;
20115 else if (CLI.CB && CLI.CB->isMustTailCall())
20116 report_fatal_error("failed to perform tail call elimination on a call "
20117 "site marked musttail");
20118
20119 // Get a count of how many bytes are to be pushed on the stack.
20120 unsigned NumBytes = ArgCCInfo.getStackSize();
20121
20122 // Create local copies for byval args
20123 SmallVector<SDValue, 8> ByValArgs;
20124 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
20125 ISD::ArgFlagsTy Flags = Outs[i].Flags;
20126 if (!Flags.isByVal())
20127 continue;
20128
20129 SDValue Arg = OutVals[i];
20130 unsigned Size = Flags.getByValSize();
20131 Align Alignment = Flags.getNonZeroByValAlign();
20132
20133 int FI =
20134 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
20135 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
20136 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
20137
20138 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
20139 /*IsVolatile=*/false,
20140 /*AlwaysInline=*/false, /*CI*/ nullptr, IsTailCall,
20142 ByValArgs.push_back(FIPtr);
20143 }
20144
20145 if (!IsTailCall)
20146 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
20147
20148 // Copy argument values to their designated locations.
20150 SmallVector<SDValue, 8> MemOpChains;
20151 SDValue StackPtr;
20152 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
20153 ++i, ++OutIdx) {
20154 CCValAssign &VA = ArgLocs[i];
20155 SDValue ArgValue = OutVals[OutIdx];
20156 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
20157
20158 // Handle passing f64 on RV32D with a soft float ABI as a special case.
20159 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20160 assert(VA.isRegLoc() && "Expected register VA assignment");
20161 assert(VA.needsCustom());
20162 SDValue SplitF64 = DAG.getNode(
20163 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
20164 SDValue Lo = SplitF64.getValue(0);
20165 SDValue Hi = SplitF64.getValue(1);
20166
20167 Register RegLo = VA.getLocReg();
20168 RegsToPass.push_back(std::make_pair(RegLo, Lo));
20169
20170 // Get the CCValAssign for the Hi part.
20171 CCValAssign &HiVA = ArgLocs[++i];
20172
20173 if (HiVA.isMemLoc()) {
20174 // Second half of f64 is passed on the stack.
20175 if (!StackPtr.getNode())
20176 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
20178 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
20179 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
20180 // Emit the store.
20181 MemOpChains.push_back(DAG.getStore(
20182 Chain, DL, Hi, Address,
20184 } else {
20185 // Second half of f64 is passed in another GPR.
20186 Register RegHigh = HiVA.getLocReg();
20187 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
20188 }
20189 continue;
20190 }
20191
20192 // Promote the value if needed.
20193 // For now, only handle fully promoted and indirect arguments.
20194 if (VA.getLocInfo() == CCValAssign::Indirect) {
20195 // Store the argument in a stack slot and pass its address.
20196 Align StackAlign =
20197 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
20198 getPrefTypeAlign(ArgValue.getValueType(), DAG));
20199 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
20200 // If the original argument was split (e.g. i128), we need
20201 // to store the required parts of it here (and pass just one address).
20202 // Vectors may be partly split to registers and partly to the stack, in
20203 // which case the base address is partly offset and subsequent stores are
20204 // relative to that.
20205 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
20206 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
20207 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
20208 // Calculate the total size to store. We don't have access to what we're
20209 // actually storing other than performing the loop and collecting the
20210 // info.
20212 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
20213 SDValue PartValue = OutVals[OutIdx + 1];
20214 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
20215 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
20216 EVT PartVT = PartValue.getValueType();
20217 if (PartVT.isScalableVector())
20218 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
20219 StoredSize += PartVT.getStoreSize();
20220 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
20221 Parts.push_back(std::make_pair(PartValue, Offset));
20222 ++i;
20223 ++OutIdx;
20224 }
20225 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
20226 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
20227 MemOpChains.push_back(
20228 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
20230 for (const auto &Part : Parts) {
20231 SDValue PartValue = Part.first;
20232 SDValue PartOffset = Part.second;
20234 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
20235 MemOpChains.push_back(
20236 DAG.getStore(Chain, DL, PartValue, Address,
20238 }
20239 ArgValue = SpillSlot;
20240 } else {
20241 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
20242 }
20243
20244 // Use local copy if it is a byval arg.
20245 if (Flags.isByVal())
20246 ArgValue = ByValArgs[j++];
20247
20248 if (VA.isRegLoc()) {
20249 // Queue up the argument copies and emit them at the end.
20250 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
20251 } else {
20252 assert(VA.isMemLoc() && "Argument not register or memory");
20253 assert(!IsTailCall && "Tail call not allowed if stack is used "
20254 "for passing parameters");
20255
20256 // Work out the address of the stack slot.
20257 if (!StackPtr.getNode())
20258 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
20260 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
20262
20263 // Emit the store.
20264 MemOpChains.push_back(
20265 DAG.getStore(Chain, DL, ArgValue, Address,
20267 }
20268 }
20269
20270 // Join the stores, which are independent of one another.
20271 if (!MemOpChains.empty())
20272 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
20273
20274 SDValue Glue;
20275
20276 // Build a sequence of copy-to-reg nodes, chained and glued together.
20277 for (auto &Reg : RegsToPass) {
20278 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
20279 Glue = Chain.getValue(1);
20280 }
20281
20282 // Validate that none of the argument registers have been marked as
20283 // reserved, if so report an error. Do the same for the return address if this
20284 // is not a tailcall.
20285 validateCCReservedRegs(RegsToPass, MF);
20286 if (!IsTailCall && MF.getSubtarget().isRegisterReservedByUser(RISCV::X1))
20288 MF.getFunction(),
20289 "Return address register required, but has been reserved."});
20290
20291 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
20292 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
20293 // split it and then direct call can be matched by PseudoCALL.
20294 bool CalleeIsLargeExternalSymbol = false;
20296 if (auto *S = dyn_cast<GlobalAddressSDNode>(Callee))
20297 Callee = getLargeGlobalAddress(S, DL, PtrVT, DAG);
20298 else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
20299 Callee = getLargeExternalSymbol(S, DL, PtrVT, DAG);
20300 CalleeIsLargeExternalSymbol = true;
20301 }
20302 } else if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
20303 const GlobalValue *GV = S->getGlobal();
20304 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL);
20305 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
20306 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL);
20307 }
20308
20309 // The first call operand is the chain and the second is the target address.
20311 Ops.push_back(Chain);
20312 Ops.push_back(Callee);
20313
20314 // Add argument registers to the end of the list so that they are
20315 // known live into the call.
20316 for (auto &Reg : RegsToPass)
20317 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
20318
20319 // Add a register mask operand representing the call-preserved registers.
20320 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
20321 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
20322 assert(Mask && "Missing call preserved mask for calling convention");
20323 Ops.push_back(DAG.getRegisterMask(Mask));
20324
20325 // Glue the call to the argument copies, if any.
20326 if (Glue.getNode())
20327 Ops.push_back(Glue);
20328
20329 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
20330 "Unexpected CFI type for a direct call");
20331
20332 // Emit the call.
20333 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
20334
20335 // Use software guarded branch for large code model non-indirect calls
20336 // Tail call to external symbol will have a null CLI.CB and we need another
20337 // way to determine the callsite type
20338 bool NeedSWGuarded = false;
20340 Subtarget.hasStdExtZicfilp() &&
20341 ((CLI.CB && !CLI.CB->isIndirectCall()) || CalleeIsLargeExternalSymbol))
20342 NeedSWGuarded = true;
20343
20344 if (IsTailCall) {
20346 unsigned CallOpc =
20347 NeedSWGuarded ? RISCVISD::SW_GUARDED_TAIL : RISCVISD::TAIL;
20348 SDValue Ret = DAG.getNode(CallOpc, DL, NodeTys, Ops);
20349 if (CLI.CFIType)
20350 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
20351 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
20352 return Ret;
20353 }
20354
20355 unsigned CallOpc = NeedSWGuarded ? RISCVISD::SW_GUARDED_CALL : RISCVISD::CALL;
20356 Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
20357 if (CLI.CFIType)
20358 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
20359 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
20360 Glue = Chain.getValue(1);
20361
20362 // Mark the end of the call, which is glued to the call itself.
20363 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
20364 Glue = Chain.getValue(1);
20365
20366 // Assign locations to each value returned by this call.
20368 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
20369 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_RISCV);
20370
20371 // Copy all of the result registers out of their specified physreg.
20372 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
20373 auto &VA = RVLocs[i];
20374 // Copy the value out
20375 SDValue RetValue =
20376 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
20377 // Glue the RetValue to the end of the call sequence
20378 Chain = RetValue.getValue(1);
20379 Glue = RetValue.getValue(2);
20380
20381 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20382 assert(VA.needsCustom());
20383 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
20384 MVT::i32, Glue);
20385 Chain = RetValue2.getValue(1);
20386 Glue = RetValue2.getValue(2);
20387 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
20388 RetValue2);
20389 } else
20390 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
20391
20392 InVals.push_back(RetValue);
20393 }
20394
20395 return Chain;
20396}
20397
20399 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
20400 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
20402 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
20403
20404 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
20405 MVT VT = Outs[i].VT;
20406 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
20407 if (CC_RISCV(i, VT, VT, CCValAssign::Full, ArgFlags, CCInfo,
20408 /*IsFixed=*/true, /*IsRet=*/true, nullptr))
20409 return false;
20410 }
20411 return true;
20412}
20413
20414SDValue
20416 bool IsVarArg,
20418 const SmallVectorImpl<SDValue> &OutVals,
20419 const SDLoc &DL, SelectionDAG &DAG) const {
20421 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
20422
20423 // Stores the assignment of the return value to a location.
20425
20426 // Info about the registers and stack slot.
20427 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
20428 *DAG.getContext());
20429
20430 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
20431 nullptr, CC_RISCV);
20432
20433 if (CallConv == CallingConv::GHC && !RVLocs.empty())
20434 report_fatal_error("GHC functions return void only");
20435
20436 SDValue Glue;
20437 SmallVector<SDValue, 4> RetOps(1, Chain);
20438
20439 // Copy the result values into the output registers.
20440 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
20441 SDValue Val = OutVals[OutIdx];
20442 CCValAssign &VA = RVLocs[i];
20443 assert(VA.isRegLoc() && "Can only return in registers!");
20444
20445 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20446 // Handle returning f64 on RV32D with a soft float ABI.
20447 assert(VA.isRegLoc() && "Expected return via registers");
20448 assert(VA.needsCustom());
20449 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
20450 DAG.getVTList(MVT::i32, MVT::i32), Val);
20451 SDValue Lo = SplitF64.getValue(0);
20452 SDValue Hi = SplitF64.getValue(1);
20453 Register RegLo = VA.getLocReg();
20454 Register RegHi = RVLocs[++i].getLocReg();
20455
20456 if (STI.isRegisterReservedByUser(RegLo) ||
20457 STI.isRegisterReservedByUser(RegHi))
20459 MF.getFunction(),
20460 "Return value register required, but has been reserved."});
20461
20462 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
20463 Glue = Chain.getValue(1);
20464 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
20465 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
20466 Glue = Chain.getValue(1);
20467 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
20468 } else {
20469 // Handle a 'normal' return.
20470 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
20471 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
20472
20473 if (STI.isRegisterReservedByUser(VA.getLocReg()))
20475 MF.getFunction(),
20476 "Return value register required, but has been reserved."});
20477
20478 // Guarantee that all emitted copies are stuck together.
20479 Glue = Chain.getValue(1);
20480 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
20481 }
20482 }
20483
20484 RetOps[0] = Chain; // Update chain.
20485
20486 // Add the glue node if we have it.
20487 if (Glue.getNode()) {
20488 RetOps.push_back(Glue);
20489 }
20490
20491 if (any_of(RVLocs,
20492 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
20493 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
20494
20495 unsigned RetOpc = RISCVISD::RET_GLUE;
20496 // Interrupt service routines use different return instructions.
20497 const Function &Func = DAG.getMachineFunction().getFunction();
20498 if (Func.hasFnAttribute("interrupt")) {
20499 if (!Func.getReturnType()->isVoidTy())
20501 "Functions with the interrupt attribute must have void return type!");
20502
20504 StringRef Kind =
20505 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
20506
20507 if (Kind == "supervisor")
20508 RetOpc = RISCVISD::SRET_GLUE;
20509 else
20510 RetOpc = RISCVISD::MRET_GLUE;
20511 }
20512
20513 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
20514}
20515
20516void RISCVTargetLowering::validateCCReservedRegs(
20517 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
20518 MachineFunction &MF) const {
20519 const Function &F = MF.getFunction();
20520 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
20521
20522 if (llvm::any_of(Regs, [&STI](auto Reg) {
20523 return STI.isRegisterReservedByUser(Reg.first);
20524 }))
20525 F.getContext().diagnose(DiagnosticInfoUnsupported{
20526 F, "Argument register required, but has been reserved."});
20527}
20528
20529// Check if the result of the node is only used as a return value, as
20530// otherwise we can't perform a tail-call.
20532 if (N->getNumValues() != 1)
20533 return false;
20534 if (!N->hasNUsesOfValue(1, 0))
20535 return false;
20536
20537 SDNode *Copy = *N->user_begin();
20538
20539 if (Copy->getOpcode() == ISD::BITCAST) {
20540 return isUsedByReturnOnly(Copy, Chain);
20541 }
20542
20543 // TODO: Handle additional opcodes in order to support tail-calling libcalls
20544 // with soft float ABIs.
20545 if (Copy->getOpcode() != ISD::CopyToReg) {
20546 return false;
20547 }
20548
20549 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
20550 // isn't safe to perform a tail call.
20551 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
20552 return false;
20553
20554 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
20555 bool HasRet = false;
20556 for (SDNode *Node : Copy->users()) {
20557 if (Node->getOpcode() != RISCVISD::RET_GLUE)
20558 return false;
20559 HasRet = true;
20560 }
20561 if (!HasRet)
20562 return false;
20563
20564 Chain = Copy->getOperand(0);
20565 return true;
20566}
20567
20569 return CI->isTailCall();
20570}
20571
20572const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
20573#define NODE_NAME_CASE(NODE) \
20574 case RISCVISD::NODE: \
20575 return "RISCVISD::" #NODE;
20576 // clang-format off
20577 switch ((RISCVISD::NodeType)Opcode) {
20579 break;
20580 NODE_NAME_CASE(RET_GLUE)
20581 NODE_NAME_CASE(SRET_GLUE)
20582 NODE_NAME_CASE(MRET_GLUE)
20583 NODE_NAME_CASE(CALL)
20584 NODE_NAME_CASE(TAIL)
20585 NODE_NAME_CASE(SELECT_CC)
20586 NODE_NAME_CASE(BR_CC)
20587 NODE_NAME_CASE(BuildGPRPair)
20588 NODE_NAME_CASE(SplitGPRPair)
20589 NODE_NAME_CASE(BuildPairF64)
20590 NODE_NAME_CASE(SplitF64)
20591 NODE_NAME_CASE(ADD_LO)
20592 NODE_NAME_CASE(HI)
20593 NODE_NAME_CASE(LLA)
20594 NODE_NAME_CASE(ADD_TPREL)
20595 NODE_NAME_CASE(MULHSU)
20596 NODE_NAME_CASE(SHL_ADD)
20597 NODE_NAME_CASE(SLLW)
20598 NODE_NAME_CASE(SRAW)
20599 NODE_NAME_CASE(SRLW)
20600 NODE_NAME_CASE(DIVW)
20601 NODE_NAME_CASE(DIVUW)
20602 NODE_NAME_CASE(REMUW)
20603 NODE_NAME_CASE(ROLW)
20604 NODE_NAME_CASE(RORW)
20605 NODE_NAME_CASE(CLZW)
20606 NODE_NAME_CASE(CTZW)
20607 NODE_NAME_CASE(ABSW)
20608 NODE_NAME_CASE(FMV_H_X)
20609 NODE_NAME_CASE(FMV_X_ANYEXTH)
20610 NODE_NAME_CASE(FMV_X_SIGNEXTH)
20611 NODE_NAME_CASE(FMV_W_X_RV64)
20612 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
20613 NODE_NAME_CASE(FCVT_X)
20614 NODE_NAME_CASE(FCVT_XU)
20615 NODE_NAME_CASE(FCVT_W_RV64)
20616 NODE_NAME_CASE(FCVT_WU_RV64)
20617 NODE_NAME_CASE(STRICT_FCVT_W_RV64)
20618 NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
20619 NODE_NAME_CASE(FROUND)
20620 NODE_NAME_CASE(FCLASS)
20621 NODE_NAME_CASE(FSGNJX)
20622 NODE_NAME_CASE(FMAX)
20623 NODE_NAME_CASE(FMIN)
20624 NODE_NAME_CASE(FLI)
20625 NODE_NAME_CASE(READ_COUNTER_WIDE)
20626 NODE_NAME_CASE(BREV8)
20627 NODE_NAME_CASE(ORC_B)
20628 NODE_NAME_CASE(ZIP)
20629 NODE_NAME_CASE(UNZIP)
20630 NODE_NAME_CASE(CLMUL)
20631 NODE_NAME_CASE(CLMULH)
20632 NODE_NAME_CASE(CLMULR)
20633 NODE_NAME_CASE(MOPR)
20634 NODE_NAME_CASE(MOPRR)
20635 NODE_NAME_CASE(SHA256SIG0)
20636 NODE_NAME_CASE(SHA256SIG1)
20637 NODE_NAME_CASE(SHA256SUM0)
20638 NODE_NAME_CASE(SHA256SUM1)
20639 NODE_NAME_CASE(SM4KS)
20640 NODE_NAME_CASE(SM4ED)
20641 NODE_NAME_CASE(SM3P0)
20642 NODE_NAME_CASE(SM3P1)
20643 NODE_NAME_CASE(TH_LWD)
20644 NODE_NAME_CASE(TH_LWUD)
20645 NODE_NAME_CASE(TH_LDD)
20646 NODE_NAME_CASE(TH_SWD)
20647 NODE_NAME_CASE(TH_SDD)
20648 NODE_NAME_CASE(VMV_V_V_VL)
20649 NODE_NAME_CASE(VMV_V_X_VL)
20650 NODE_NAME_CASE(VFMV_V_F_VL)
20651 NODE_NAME_CASE(VMV_X_S)
20652 NODE_NAME_CASE(VMV_S_X_VL)
20653 NODE_NAME_CASE(VFMV_S_F_VL)
20654 NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
20655 NODE_NAME_CASE(READ_VLENB)
20656 NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
20657 NODE_NAME_CASE(TRUNCATE_VECTOR_VL_SSAT)
20658 NODE_NAME_CASE(TRUNCATE_VECTOR_VL_USAT)
20659 NODE_NAME_CASE(VSLIDEUP_VL)
20660 NODE_NAME_CASE(VSLIDE1UP_VL)
20661 NODE_NAME_CASE(VSLIDEDOWN_VL)
20662 NODE_NAME_CASE(VSLIDE1DOWN_VL)
20663 NODE_NAME_CASE(VFSLIDE1UP_VL)
20664 NODE_NAME_CASE(VFSLIDE1DOWN_VL)
20665 NODE_NAME_CASE(VID_VL)
20666 NODE_NAME_CASE(VFNCVT_ROD_VL)
20667 NODE_NAME_CASE(VECREDUCE_ADD_VL)
20668 NODE_NAME_CASE(VECREDUCE_UMAX_VL)
20669 NODE_NAME_CASE(VECREDUCE_SMAX_VL)
20670 NODE_NAME_CASE(VECREDUCE_UMIN_VL)
20671 NODE_NAME_CASE(VECREDUCE_SMIN_VL)
20672 NODE_NAME_CASE(VECREDUCE_AND_VL)
20673 NODE_NAME_CASE(VECREDUCE_OR_VL)
20674 NODE_NAME_CASE(VECREDUCE_XOR_VL)
20675 NODE_NAME_CASE(VECREDUCE_FADD_VL)
20676 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
20677 NODE_NAME_CASE(VECREDUCE_FMIN_VL)
20678 NODE_NAME_CASE(VECREDUCE_FMAX_VL)
20679 NODE_NAME_CASE(ADD_VL)
20680 NODE_NAME_CASE(AND_VL)
20681 NODE_NAME_CASE(MUL_VL)
20682 NODE_NAME_CASE(OR_VL)
20683 NODE_NAME_CASE(SDIV_VL)
20684 NODE_NAME_CASE(SHL_VL)
20685 NODE_NAME_CASE(SREM_VL)
20686 NODE_NAME_CASE(SRA_VL)
20687 NODE_NAME_CASE(SRL_VL)
20688 NODE_NAME_CASE(ROTL_VL)
20689 NODE_NAME_CASE(ROTR_VL)
20690 NODE_NAME_CASE(SUB_VL)
20691 NODE_NAME_CASE(UDIV_VL)
20692 NODE_NAME_CASE(UREM_VL)
20693 NODE_NAME_CASE(XOR_VL)
20694 NODE_NAME_CASE(AVGFLOORS_VL)
20695 NODE_NAME_CASE(AVGFLOORU_VL)
20696 NODE_NAME_CASE(AVGCEILS_VL)
20697 NODE_NAME_CASE(AVGCEILU_VL)
20698 NODE_NAME_CASE(SADDSAT_VL)
20699 NODE_NAME_CASE(UADDSAT_VL)
20700 NODE_NAME_CASE(SSUBSAT_VL)
20701 NODE_NAME_CASE(USUBSAT_VL)
20702 NODE_NAME_CASE(FADD_VL)
20703 NODE_NAME_CASE(FSUB_VL)
20704 NODE_NAME_CASE(FMUL_VL)
20705 NODE_NAME_CASE(FDIV_VL)
20706 NODE_NAME_CASE(FNEG_VL)
20707 NODE_NAME_CASE(FABS_VL)
20708 NODE_NAME_CASE(FSQRT_VL)
20709 NODE_NAME_CASE(FCLASS_VL)
20710 NODE_NAME_CASE(VFMADD_VL)
20711 NODE_NAME_CASE(VFNMADD_VL)
20712 NODE_NAME_CASE(VFMSUB_VL)
20713 NODE_NAME_CASE(VFNMSUB_VL)
20714 NODE_NAME_CASE(VFWMADD_VL)
20715 NODE_NAME_CASE(VFWNMADD_VL)
20716 NODE_NAME_CASE(VFWMSUB_VL)
20717 NODE_NAME_CASE(VFWNMSUB_VL)
20718 NODE_NAME_CASE(FCOPYSIGN_VL)
20719 NODE_NAME_CASE(SMIN_VL)
20720 NODE_NAME_CASE(SMAX_VL)
20721 NODE_NAME_CASE(UMIN_VL)
20722 NODE_NAME_CASE(UMAX_VL)
20723 NODE_NAME_CASE(BITREVERSE_VL)
20724 NODE_NAME_CASE(BSWAP_VL)
20725 NODE_NAME_CASE(CTLZ_VL)
20726 NODE_NAME_CASE(CTTZ_VL)
20727 NODE_NAME_CASE(CTPOP_VL)
20728 NODE_NAME_CASE(VFMIN_VL)
20729 NODE_NAME_CASE(VFMAX_VL)
20730 NODE_NAME_CASE(MULHS_VL)
20731 NODE_NAME_CASE(MULHU_VL)
20732 NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)
20733 NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL)
20734 NODE_NAME_CASE(VFCVT_RM_X_F_VL)
20735 NODE_NAME_CASE(VFCVT_RM_XU_F_VL)
20736 NODE_NAME_CASE(VFROUND_NOEXCEPT_VL)
20737 NODE_NAME_CASE(SINT_TO_FP_VL)
20738 NODE_NAME_CASE(UINT_TO_FP_VL)
20739 NODE_NAME_CASE(VFCVT_RM_F_XU_VL)
20740 NODE_NAME_CASE(VFCVT_RM_F_X_VL)
20741 NODE_NAME_CASE(FP_EXTEND_VL)
20742 NODE_NAME_CASE(FP_ROUND_VL)
20743 NODE_NAME_CASE(STRICT_FADD_VL)
20744 NODE_NAME_CASE(STRICT_FSUB_VL)
20745 NODE_NAME_CASE(STRICT_FMUL_VL)
20746 NODE_NAME_CASE(STRICT_FDIV_VL)
20747 NODE_NAME_CASE(STRICT_FSQRT_VL)
20748 NODE_NAME_CASE(STRICT_VFMADD_VL)
20749 NODE_NAME_CASE(STRICT_VFNMADD_VL)
20750 NODE_NAME_CASE(STRICT_VFMSUB_VL)
20751 NODE_NAME_CASE(STRICT_VFNMSUB_VL)
20752 NODE_NAME_CASE(STRICT_FP_ROUND_VL)
20753 NODE_NAME_CASE(STRICT_FP_EXTEND_VL)
20754 NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL)
20755 NODE_NAME_CASE(STRICT_SINT_TO_FP_VL)
20756 NODE_NAME_CASE(STRICT_UINT_TO_FP_VL)
20757 NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL)
20758 NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL)
20759 NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL)
20760 NODE_NAME_CASE(STRICT_FSETCC_VL)
20761 NODE_NAME_CASE(STRICT_FSETCCS_VL)
20762 NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL)
20763 NODE_NAME_CASE(VWMUL_VL)
20764 NODE_NAME_CASE(VWMULU_VL)
20765 NODE_NAME_CASE(VWMULSU_VL)
20766 NODE_NAME_CASE(VWADD_VL)
20767 NODE_NAME_CASE(VWADDU_VL)
20768 NODE_NAME_CASE(VWSUB_VL)
20769 NODE_NAME_CASE(VWSUBU_VL)
20770 NODE_NAME_CASE(VWADD_W_VL)
20771 NODE_NAME_CASE(VWADDU_W_VL)
20772 NODE_NAME_CASE(VWSUB_W_VL)
20773 NODE_NAME_CASE(VWSUBU_W_VL)
20774 NODE_NAME_CASE(VWSLL_VL)
20775 NODE_NAME_CASE(VFWMUL_VL)
20776 NODE_NAME_CASE(VFWADD_VL)
20777 NODE_NAME_CASE(VFWSUB_VL)
20778 NODE_NAME_CASE(VFWADD_W_VL)
20779 NODE_NAME_CASE(VFWSUB_W_VL)
20780 NODE_NAME_CASE(VWMACC_VL)
20781 NODE_NAME_CASE(VWMACCU_VL)
20782 NODE_NAME_CASE(VWMACCSU_VL)
20783 NODE_NAME_CASE(SETCC_VL)
20784 NODE_NAME_CASE(VMERGE_VL)
20785 NODE_NAME_CASE(VMAND_VL)
20786 NODE_NAME_CASE(VMOR_VL)
20787 NODE_NAME_CASE(VMXOR_VL)
20788 NODE_NAME_CASE(VMCLR_VL)
20789 NODE_NAME_CASE(VMSET_VL)
20790 NODE_NAME_CASE(VRGATHER_VX_VL)
20791 NODE_NAME_CASE(VRGATHER_VV_VL)
20792 NODE_NAME_CASE(VRGATHEREI16_VV_VL)
20793 NODE_NAME_CASE(VSEXT_VL)
20794 NODE_NAME_CASE(VZEXT_VL)
20795 NODE_NAME_CASE(VCPOP_VL)
20796 NODE_NAME_CASE(VFIRST_VL)
20797 NODE_NAME_CASE(READ_CSR)
20798 NODE_NAME_CASE(WRITE_CSR)
20799 NODE_NAME_CASE(SWAP_CSR)
20800 NODE_NAME_CASE(CZERO_EQZ)
20801 NODE_NAME_CASE(CZERO_NEZ)
20802 NODE_NAME_CASE(SW_GUARDED_BRIND)
20803 NODE_NAME_CASE(SW_GUARDED_CALL)
20804 NODE_NAME_CASE(SW_GUARDED_TAIL)
20805 NODE_NAME_CASE(TUPLE_INSERT)
20806 NODE_NAME_CASE(TUPLE_EXTRACT)
20807 NODE_NAME_CASE(SF_VC_XV_SE)
20808 NODE_NAME_CASE(SF_VC_IV_SE)
20809 NODE_NAME_CASE(SF_VC_VV_SE)
20810 NODE_NAME_CASE(SF_VC_FV_SE)
20811 NODE_NAME_CASE(SF_VC_XVV_SE)
20812 NODE_NAME_CASE(SF_VC_IVV_SE)
20813 NODE_NAME_CASE(SF_VC_VVV_SE)
20814 NODE_NAME_CASE(SF_VC_FVV_SE)
20815 NODE_NAME_CASE(SF_VC_XVW_SE)
20816 NODE_NAME_CASE(SF_VC_IVW_SE)
20817 NODE_NAME_CASE(SF_VC_VVW_SE)
20818 NODE_NAME_CASE(SF_VC_FVW_SE)
20819 NODE_NAME_CASE(SF_VC_V_X_SE)
20820 NODE_NAME_CASE(SF_VC_V_I_SE)
20821 NODE_NAME_CASE(SF_VC_V_XV_SE)
20822 NODE_NAME_CASE(SF_VC_V_IV_SE)
20823 NODE_NAME_CASE(SF_VC_V_VV_SE)
20824 NODE_NAME_CASE(SF_VC_V_FV_SE)
20825 NODE_NAME_CASE(SF_VC_V_XVV_SE)
20826 NODE_NAME_CASE(SF_VC_V_IVV_SE)
20827 NODE_NAME_CASE(SF_VC_V_VVV_SE)
20828 NODE_NAME_CASE(SF_VC_V_FVV_SE)
20829 NODE_NAME_CASE(SF_VC_V_XVW_SE)
20830 NODE_NAME_CASE(SF_VC_V_IVW_SE)
20831 NODE_NAME_CASE(SF_VC_V_VVW_SE)
20832 NODE_NAME_CASE(SF_VC_V_FVW_SE)
20833 }
20834 // clang-format on
20835 return nullptr;
20836#undef NODE_NAME_CASE
20837}
20838
20839/// getConstraintType - Given a constraint letter, return the type of
20840/// constraint it is for this target.
20843 if (Constraint.size() == 1) {
20844 switch (Constraint[0]) {
20845 default:
20846 break;
20847 case 'f':
20848 case 'R':
20849 return C_RegisterClass;
20850 case 'I':
20851 case 'J':
20852 case 'K':
20853 return C_Immediate;
20854 case 'A':
20855 return C_Memory;
20856 case 's':
20857 case 'S': // A symbolic address
20858 return C_Other;
20859 }
20860 } else {
20861 if (Constraint == "vr" || Constraint == "vd" || Constraint == "vm")
20862 return C_RegisterClass;
20863 if (Constraint == "cr" || Constraint == "cf")
20864 return C_RegisterClass;
20865 }
20866 return TargetLowering::getConstraintType(Constraint);
20867}
20868
20869std::pair<unsigned, const TargetRegisterClass *>
20871 StringRef Constraint,
20872 MVT VT) const {
20873 // First, see if this is a constraint that directly corresponds to a RISC-V
20874 // register class.
20875 if (Constraint.size() == 1) {
20876 switch (Constraint[0]) {
20877 case 'r':
20878 // TODO: Support fixed vectors up to XLen for P extension?
20879 if (VT.isVector())
20880 break;
20881 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
20882 return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
20883 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
20884 return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
20885 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
20886 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
20887 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
20888 case 'f':
20889 if (VT == MVT::f16) {
20890 if (Subtarget.hasStdExtZfhmin())
20891 return std::make_pair(0U, &RISCV::FPR16RegClass);
20892 if (Subtarget.hasStdExtZhinxmin())
20893 return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
20894 } else if (VT == MVT::f32) {
20895 if (Subtarget.hasStdExtF())
20896 return std::make_pair(0U, &RISCV::FPR32RegClass);
20897 if (Subtarget.hasStdExtZfinx())
20898 return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
20899 } else if (VT == MVT::f64) {
20900 if (Subtarget.hasStdExtD())
20901 return std::make_pair(0U, &RISCV::FPR64RegClass);
20902 if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
20903 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
20904 if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
20905 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
20906 }
20907 break;
20908 case 'R':
20909 if (VT == MVT::f64 && !Subtarget.is64Bit() && Subtarget.hasStdExtZdinx())
20910 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
20911 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
20912 default:
20913 break;
20914 }
20915 } else if (Constraint == "vr") {
20916 for (const auto *RC :
20917 {&RISCV::VRRegClass, &RISCV::VRM2RegClass, &RISCV::VRM4RegClass,
20918 &RISCV::VRM8RegClass, &RISCV::VRN2M1RegClass, &RISCV::VRN3M1RegClass,
20919 &RISCV::VRN4M1RegClass, &RISCV::VRN5M1RegClass,
20920 &RISCV::VRN6M1RegClass, &RISCV::VRN7M1RegClass,
20921 &RISCV::VRN8M1RegClass, &RISCV::VRN2M2RegClass,
20922 &RISCV::VRN3M2RegClass, &RISCV::VRN4M2RegClass,
20923 &RISCV::VRN2M4RegClass}) {
20924 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
20925 return std::make_pair(0U, RC);
20926 }
20927 } else if (Constraint == "vd") {
20928 for (const auto *RC :
20929 {&RISCV::VRNoV0RegClass, &RISCV::VRM2NoV0RegClass,
20930 &RISCV::VRM4NoV0RegClass, &RISCV::VRM8NoV0RegClass,
20931 &RISCV::VRN2M1NoV0RegClass, &RISCV::VRN3M1NoV0RegClass,
20932 &RISCV::VRN4M1NoV0RegClass, &RISCV::VRN5M1NoV0RegClass,
20933 &RISCV::VRN6M1NoV0RegClass, &RISCV::VRN7M1NoV0RegClass,
20934 &RISCV::VRN8M1NoV0RegClass, &RISCV::VRN2M2NoV0RegClass,
20935 &RISCV::VRN3M2NoV0RegClass, &RISCV::VRN4M2NoV0RegClass,
20936 &RISCV::VRN2M4NoV0RegClass}) {
20937 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
20938 return std::make_pair(0U, RC);
20939 }
20940 } else if (Constraint == "vm") {
20941 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
20942 return std::make_pair(0U, &RISCV::VMV0RegClass);
20943 } else if (Constraint == "cr") {
20944 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
20945 return std::make_pair(0U, &RISCV::GPRF16CRegClass);
20946 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
20947 return std::make_pair(0U, &RISCV::GPRF32CRegClass);
20948 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
20949 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
20950 if (!VT.isVector())
20951 return std::make_pair(0U, &RISCV::GPRCRegClass);
20952 } else if (Constraint == "cf") {
20953 if (VT == MVT::f16) {
20954 if (Subtarget.hasStdExtZfhmin())
20955 return std::make_pair(0U, &RISCV::FPR16CRegClass);
20956 if (Subtarget.hasStdExtZhinxmin())
20957 return std::make_pair(0U, &RISCV::GPRF16CRegClass);
20958 } else if (VT == MVT::f32) {
20959 if (Subtarget.hasStdExtF())
20960 return std::make_pair(0U, &RISCV::FPR32CRegClass);
20961 if (Subtarget.hasStdExtZfinx())
20962 return std::make_pair(0U, &RISCV::GPRF32CRegClass);
20963 } else if (VT == MVT::f64) {
20964 if (Subtarget.hasStdExtD())
20965 return std::make_pair(0U, &RISCV::FPR64CRegClass);
20966 if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
20967 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
20968 if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
20969 return std::make_pair(0U, &RISCV::GPRCRegClass);
20970 }
20971 }
20972
20973 // Clang will correctly decode the usage of register name aliases into their
20974 // official names. However, other frontends like `rustc` do not. This allows
20975 // users of these frontends to use the ABI names for registers in LLVM-style
20976 // register constraints.
20977 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
20978 .Case("{zero}", RISCV::X0)
20979 .Case("{ra}", RISCV::X1)
20980 .Case("{sp}", RISCV::X2)
20981 .Case("{gp}", RISCV::X3)
20982 .Case("{tp}", RISCV::X4)
20983 .Case("{t0}", RISCV::X5)
20984 .Case("{t1}", RISCV::X6)
20985 .Case("{t2}", RISCV::X7)
20986 .Cases("{s0}", "{fp}", RISCV::X8)
20987 .Case("{s1}", RISCV::X9)
20988 .Case("{a0}", RISCV::X10)
20989 .Case("{a1}", RISCV::X11)
20990 .Case("{a2}", RISCV::X12)
20991 .Case("{a3}", RISCV::X13)
20992 .Case("{a4}", RISCV::X14)
20993 .Case("{a5}", RISCV::X15)
20994 .Case("{a6}", RISCV::X16)
20995 .Case("{a7}", RISCV::X17)
20996 .Case("{s2}", RISCV::X18)
20997 .Case("{s3}", RISCV::X19)
20998 .Case("{s4}", RISCV::X20)
20999 .Case("{s5}", RISCV::X21)
21000 .Case("{s6}", RISCV::X22)
21001 .Case("{s7}", RISCV::X23)
21002 .Case("{s8}", RISCV::X24)
21003 .Case("{s9}", RISCV::X25)
21004 .Case("{s10}", RISCV::X26)
21005 .Case("{s11}", RISCV::X27)
21006 .Case("{t3}", RISCV::X28)
21007 .Case("{t4}", RISCV::X29)
21008 .Case("{t5}", RISCV::X30)
21009 .Case("{t6}", RISCV::X31)
21010 .Default(RISCV::NoRegister);
21011 if (XRegFromAlias != RISCV::NoRegister)
21012 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
21013
21014 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
21015 // TableGen record rather than the AsmName to choose registers for InlineAsm
21016 // constraints, plus we want to match those names to the widest floating point
21017 // register type available, manually select floating point registers here.
21018 //
21019 // The second case is the ABI name of the register, so that frontends can also
21020 // use the ABI names in register constraint lists.
21021 if (Subtarget.hasStdExtF()) {
21022 unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
21023 .Cases("{f0}", "{ft0}", RISCV::F0_F)
21024 .Cases("{f1}", "{ft1}", RISCV::F1_F)
21025 .Cases("{f2}", "{ft2}", RISCV::F2_F)
21026 .Cases("{f3}", "{ft3}", RISCV::F3_F)
21027 .Cases("{f4}", "{ft4}", RISCV::F4_F)
21028 .Cases("{f5}", "{ft5}", RISCV::F5_F)
21029 .Cases("{f6}", "{ft6}", RISCV::F6_F)
21030 .Cases("{f7}", "{ft7}", RISCV::F7_F)
21031 .Cases("{f8}", "{fs0}", RISCV::F8_F)
21032 .Cases("{f9}", "{fs1}", RISCV::F9_F)
21033 .Cases("{f10}", "{fa0}", RISCV::F10_F)
21034 .Cases("{f11}", "{fa1}", RISCV::F11_F)
21035 .Cases("{f12}", "{fa2}", RISCV::F12_F)
21036 .Cases("{f13}", "{fa3}", RISCV::F13_F)
21037 .Cases("{f14}", "{fa4}", RISCV::F14_F)
21038 .Cases("{f15}", "{fa5}", RISCV::F15_F)
21039 .Cases("{f16}", "{fa6}", RISCV::F16_F)
21040 .Cases("{f17}&q