LLVM 20.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
19#include "RISCVRegisterInfo.h"
21#include "RISCVSubtarget.h"
22#include "llvm/ADT/SmallSet.h"
23#include "llvm/ADT/Statistic.h"
36#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
43#include "llvm/Support/Debug.h"
49#include <optional>
50
51using namespace llvm;
52
53#define DEBUG_TYPE "riscv-lower"
54
55STATISTIC(NumTailCalls, "Number of tail calls");
56
58 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
59 cl::desc("Give the maximum size (in number of nodes) of the web of "
60 "instructions that we will consider for VW expansion"),
61 cl::init(18));
62
63static cl::opt<bool>
64 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
65 cl::desc("Allow the formation of VW_W operations (e.g., "
66 "VWADD_W) with splat constants"),
67 cl::init(false));
68
70 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
71 cl::desc("Set the minimum number of repetitions of a divisor to allow "
72 "transformation to multiplications by the reciprocal"),
73 cl::init(2));
74
75static cl::opt<int>
77 cl::desc("Give the maximum number of instructions that we will "
78 "use for creating a floating-point immediate value"),
79 cl::init(2));
80
82 const RISCVSubtarget &STI)
83 : TargetLowering(TM), Subtarget(STI) {
84
85 RISCVABI::ABI ABI = Subtarget.getTargetABI();
86 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
87
88 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
89 !Subtarget.hasStdExtF()) {
90 errs() << "Hard-float 'f' ABI can't be used for a target that "
91 "doesn't support the F instruction set extension (ignoring "
92 "target-abi)\n";
94 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
95 !Subtarget.hasStdExtD()) {
96 errs() << "Hard-float 'd' ABI can't be used for a target that "
97 "doesn't support the D instruction set extension (ignoring "
98 "target-abi)\n";
100 }
101
102 switch (ABI) {
103 default:
104 report_fatal_error("Don't know how to lower this ABI");
113 break;
114 }
115
116 MVT XLenVT = Subtarget.getXLenVT();
117
118 // Set up the register classes.
119 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
120
121 if (Subtarget.hasStdExtZfhmin())
122 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
123 if (Subtarget.hasStdExtZfbfmin())
124 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
125 if (Subtarget.hasStdExtF())
126 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
127 if (Subtarget.hasStdExtD())
128 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
129 if (Subtarget.hasStdExtZhinxmin())
130 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
131 if (Subtarget.hasStdExtZfinx())
132 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
133 if (Subtarget.hasStdExtZdinx()) {
134 if (Subtarget.is64Bit())
135 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
136 else
137 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
138 }
139
140 static const MVT::SimpleValueType BoolVecVTs[] = {
141 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
142 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
143 static const MVT::SimpleValueType IntVecVTs[] = {
144 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
145 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
146 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
147 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
148 MVT::nxv4i64, MVT::nxv8i64};
149 static const MVT::SimpleValueType F16VecVTs[] = {
150 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
151 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
152 static const MVT::SimpleValueType BF16VecVTs[] = {
153 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
154 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
155 static const MVT::SimpleValueType F32VecVTs[] = {
156 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
157 static const MVT::SimpleValueType F64VecVTs[] = {
158 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
159 static const MVT::SimpleValueType VecTupleVTs[] = {
160 MVT::riscv_nxv1i8x2, MVT::riscv_nxv1i8x3, MVT::riscv_nxv1i8x4,
161 MVT::riscv_nxv1i8x5, MVT::riscv_nxv1i8x6, MVT::riscv_nxv1i8x7,
162 MVT::riscv_nxv1i8x8, MVT::riscv_nxv2i8x2, MVT::riscv_nxv2i8x3,
163 MVT::riscv_nxv2i8x4, MVT::riscv_nxv2i8x5, MVT::riscv_nxv2i8x6,
164 MVT::riscv_nxv2i8x7, MVT::riscv_nxv2i8x8, MVT::riscv_nxv4i8x2,
165 MVT::riscv_nxv4i8x3, MVT::riscv_nxv4i8x4, MVT::riscv_nxv4i8x5,
166 MVT::riscv_nxv4i8x6, MVT::riscv_nxv4i8x7, MVT::riscv_nxv4i8x8,
167 MVT::riscv_nxv8i8x2, MVT::riscv_nxv8i8x3, MVT::riscv_nxv8i8x4,
168 MVT::riscv_nxv8i8x5, MVT::riscv_nxv8i8x6, MVT::riscv_nxv8i8x7,
169 MVT::riscv_nxv8i8x8, MVT::riscv_nxv16i8x2, MVT::riscv_nxv16i8x3,
170 MVT::riscv_nxv16i8x4, MVT::riscv_nxv32i8x2};
171
172 if (Subtarget.hasVInstructions()) {
173 auto addRegClassForRVV = [this](MVT VT) {
174 // Disable the smallest fractional LMUL types if ELEN is less than
175 // RVVBitsPerBlock.
176 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
177 if (VT.getVectorMinNumElements() < MinElts)
178 return;
179
180 unsigned Size = VT.getSizeInBits().getKnownMinValue();
181 const TargetRegisterClass *RC;
183 RC = &RISCV::VRRegClass;
184 else if (Size == 2 * RISCV::RVVBitsPerBlock)
185 RC = &RISCV::VRM2RegClass;
186 else if (Size == 4 * RISCV::RVVBitsPerBlock)
187 RC = &RISCV::VRM4RegClass;
188 else if (Size == 8 * RISCV::RVVBitsPerBlock)
189 RC = &RISCV::VRM8RegClass;
190 else
191 llvm_unreachable("Unexpected size");
192
193 addRegisterClass(VT, RC);
194 };
195
196 for (MVT VT : BoolVecVTs)
197 addRegClassForRVV(VT);
198 for (MVT VT : IntVecVTs) {
199 if (VT.getVectorElementType() == MVT::i64 &&
200 !Subtarget.hasVInstructionsI64())
201 continue;
202 addRegClassForRVV(VT);
203 }
204
205 if (Subtarget.hasVInstructionsF16Minimal())
206 for (MVT VT : F16VecVTs)
207 addRegClassForRVV(VT);
208
209 if (Subtarget.hasVInstructionsBF16Minimal())
210 for (MVT VT : BF16VecVTs)
211 addRegClassForRVV(VT);
212
213 if (Subtarget.hasVInstructionsF32())
214 for (MVT VT : F32VecVTs)
215 addRegClassForRVV(VT);
216
217 if (Subtarget.hasVInstructionsF64())
218 for (MVT VT : F64VecVTs)
219 addRegClassForRVV(VT);
220
221 if (Subtarget.useRVVForFixedLengthVectors()) {
222 auto addRegClassForFixedVectors = [this](MVT VT) {
223 MVT ContainerVT = getContainerForFixedLengthVector(VT);
224 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
225 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
226 addRegisterClass(VT, TRI.getRegClass(RCID));
227 };
229 if (useRVVForFixedLengthVectorVT(VT))
230 addRegClassForFixedVectors(VT);
231
233 if (useRVVForFixedLengthVectorVT(VT))
234 addRegClassForFixedVectors(VT);
235 }
236
237 addRegisterClass(MVT::riscv_nxv1i8x2, &RISCV::VRN2M1RegClass);
238 addRegisterClass(MVT::riscv_nxv1i8x3, &RISCV::VRN3M1RegClass);
239 addRegisterClass(MVT::riscv_nxv1i8x4, &RISCV::VRN4M1RegClass);
240 addRegisterClass(MVT::riscv_nxv1i8x5, &RISCV::VRN5M1RegClass);
241 addRegisterClass(MVT::riscv_nxv1i8x6, &RISCV::VRN6M1RegClass);
242 addRegisterClass(MVT::riscv_nxv1i8x7, &RISCV::VRN7M1RegClass);
243 addRegisterClass(MVT::riscv_nxv1i8x8, &RISCV::VRN8M1RegClass);
244 addRegisterClass(MVT::riscv_nxv2i8x2, &RISCV::VRN2M1RegClass);
245 addRegisterClass(MVT::riscv_nxv2i8x3, &RISCV::VRN3M1RegClass);
246 addRegisterClass(MVT::riscv_nxv2i8x4, &RISCV::VRN4M1RegClass);
247 addRegisterClass(MVT::riscv_nxv2i8x5, &RISCV::VRN5M1RegClass);
248 addRegisterClass(MVT::riscv_nxv2i8x6, &RISCV::VRN6M1RegClass);
249 addRegisterClass(MVT::riscv_nxv2i8x7, &RISCV::VRN7M1RegClass);
250 addRegisterClass(MVT::riscv_nxv2i8x8, &RISCV::VRN8M1RegClass);
251 addRegisterClass(MVT::riscv_nxv4i8x2, &RISCV::VRN2M1RegClass);
252 addRegisterClass(MVT::riscv_nxv4i8x3, &RISCV::VRN3M1RegClass);
253 addRegisterClass(MVT::riscv_nxv4i8x4, &RISCV::VRN4M1RegClass);
254 addRegisterClass(MVT::riscv_nxv4i8x5, &RISCV::VRN5M1RegClass);
255 addRegisterClass(MVT::riscv_nxv4i8x6, &RISCV::VRN6M1RegClass);
256 addRegisterClass(MVT::riscv_nxv4i8x7, &RISCV::VRN7M1RegClass);
257 addRegisterClass(MVT::riscv_nxv4i8x8, &RISCV::VRN8M1RegClass);
258 addRegisterClass(MVT::riscv_nxv8i8x2, &RISCV::VRN2M1RegClass);
259 addRegisterClass(MVT::riscv_nxv8i8x3, &RISCV::VRN3M1RegClass);
260 addRegisterClass(MVT::riscv_nxv8i8x4, &RISCV::VRN4M1RegClass);
261 addRegisterClass(MVT::riscv_nxv8i8x5, &RISCV::VRN5M1RegClass);
262 addRegisterClass(MVT::riscv_nxv8i8x6, &RISCV::VRN6M1RegClass);
263 addRegisterClass(MVT::riscv_nxv8i8x7, &RISCV::VRN7M1RegClass);
264 addRegisterClass(MVT::riscv_nxv8i8x8, &RISCV::VRN8M1RegClass);
265 addRegisterClass(MVT::riscv_nxv16i8x2, &RISCV::VRN2M2RegClass);
266 addRegisterClass(MVT::riscv_nxv16i8x3, &RISCV::VRN3M2RegClass);
267 addRegisterClass(MVT::riscv_nxv16i8x4, &RISCV::VRN4M2RegClass);
268 addRegisterClass(MVT::riscv_nxv32i8x2, &RISCV::VRN2M4RegClass);
269 }
270
271 // Compute derived properties from the register classes.
273
275
277 MVT::i1, Promote);
278 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
280 MVT::i1, Promote);
281
282 // TODO: add all necessary setOperationAction calls.
284
289
294 if (!(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
297 }
298
300
303
304 if (!Subtarget.hasVendorXTHeadBb())
306
308
309 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb() &&
310 !(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()))
311 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
312
313 if (Subtarget.is64Bit()) {
315
318 MVT::i32, Custom);
320 if (!Subtarget.hasStdExtZbb())
323 Custom);
325 }
326 if (!Subtarget.hasStdExtZmmul()) {
328 } else if (Subtarget.is64Bit()) {
331 } else {
333 }
334
335 if (!Subtarget.hasStdExtM()) {
337 Expand);
338 } else if (Subtarget.is64Bit()) {
340 {MVT::i8, MVT::i16, MVT::i32}, Custom);
341 }
342
345 Expand);
346
348 Custom);
349
350 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
351 if (Subtarget.is64Bit())
353 } else if (Subtarget.hasVendorXTHeadBb()) {
354 if (Subtarget.is64Bit())
357 } else if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
359 } else {
361 }
362
363 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
364 // pattern match it directly in isel.
366 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
367 Subtarget.hasVendorXTHeadBb())
368 ? Legal
369 : Expand);
370
371 if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
373 } else {
374 // Zbkb can use rev8+brev8 to implement bitreverse.
376 Subtarget.hasStdExtZbkb() ? Custom : Expand);
377 }
378
379 if (Subtarget.hasStdExtZbb() ||
380 (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
382 Legal);
383 }
384
385 if (Subtarget.hasStdExtZbb() ||
386 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
387 if (Subtarget.is64Bit())
389 } else {
391 }
392
393 if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
394 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
395 // We need the custom lowering to make sure that the resulting sequence
396 // for the 32bit case is efficient on 64bit targets.
397 if (Subtarget.is64Bit())
399 } else {
401 }
402
403 if (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()) {
405 } else if (Subtarget.hasShortForwardBranchOpt()) {
406 // We can use PseudoCCSUB to implement ABS.
408 } else if (Subtarget.is64Bit()) {
410 }
411
412 if (Subtarget.useCCMovInsn())
414 else if (!Subtarget.hasVendorXTHeadCondMov())
416
417 static const unsigned FPLegalNodeTypes[] = {
425
426 static const ISD::CondCode FPCCToExpand[] = {
430
431 static const unsigned FPOpToExpand[] = {
433 ISD::FREM};
434
435 static const unsigned FPRndMode[] = {
438
439 static const unsigned ZfhminZfbfminPromoteOps[] = {
449
450 if (Subtarget.hasStdExtZfbfmin()) {
456 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
463 }
464
465 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
466 if (Subtarget.hasStdExtZfhOrZhinx()) {
467 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
468 setOperationAction(FPRndMode, MVT::f16,
469 Subtarget.hasStdExtZfa() ? Legal : Custom);
472 Subtarget.hasStdExtZfa() ? Legal : Custom);
473 if (Subtarget.hasStdExtZfa())
475 } else {
476 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
481 setOperationAction(Op, MVT::f16, Custom);
487 }
488
490
493 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
497
499 ISD::FNEARBYINT, MVT::f16,
500 Subtarget.hasStdExtZfh() && Subtarget.hasStdExtZfa() ? Legal : Promote);
505 MVT::f16, Promote);
506
507 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
508 // complete support for all operations in LegalizeDAG.
513 MVT::f16, Promote);
514
515 // We need to custom promote this.
516 if (Subtarget.is64Bit())
518 }
519
520 if (Subtarget.hasStdExtFOrZfinx()) {
521 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
522 setOperationAction(FPRndMode, MVT::f32,
523 Subtarget.hasStdExtZfa() ? Legal : Custom);
524 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
528 setOperationAction(FPOpToExpand, MVT::f32, Expand);
529 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
530 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
531 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
532 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
536 Subtarget.isSoftFPABI() ? LibCall : Custom);
541
542 if (Subtarget.hasStdExtZfa()) {
546 } else {
548 }
549 }
550
551 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
553
554 if (Subtarget.hasStdExtDOrZdinx()) {
555 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
556
557 if (!Subtarget.is64Bit())
559
560 if (Subtarget.hasStdExtZfa()) {
562 setOperationAction(FPRndMode, MVT::f64, Legal);
565 } else {
566 if (Subtarget.is64Bit())
567 setOperationAction(FPRndMode, MVT::f64, Custom);
568
570 }
571
574 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
578 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
579 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
580 setOperationAction(FPOpToExpand, MVT::f64, Expand);
581 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
582 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
583 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
584 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
588 Subtarget.isSoftFPABI() ? LibCall : Custom);
593 }
594
595 if (Subtarget.is64Bit()) {
598 MVT::i32, Custom);
600 }
601
602 if (Subtarget.hasStdExtFOrZfinx()) {
604 Custom);
605
606 // f16/bf16 require custom handling.
608 Custom);
610 Custom);
611
614 }
615
618 XLenVT, Custom);
619
621
622 if (Subtarget.is64Bit())
624
625 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
626 // Unfortunately this can't be determined just from the ISA naming string.
628 Subtarget.is64Bit() ? Legal : Custom);
630 Subtarget.is64Bit() ? Legal : Custom);
631
632 if (Subtarget.is64Bit()) {
635 }
636
639 if (Subtarget.is64Bit())
641
642 if (Subtarget.hasStdExtZicbop()) {
644 }
645
646 if (Subtarget.hasStdExtA()) {
648 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
650 else
652 } else if (Subtarget.hasForcedAtomics()) {
654 } else {
656 }
657
659
661
662 if (getTargetMachine().getTargetTriple().isOSLinux()) {
663 // Custom lowering of llvm.clear_cache.
665 }
666
667 if (Subtarget.hasVInstructions()) {
669
671
672 // RVV intrinsics may have illegal operands.
673 // We also need to custom legalize vmv.x.s.
676 {MVT::i8, MVT::i16}, Custom);
677 if (Subtarget.is64Bit())
679 MVT::i32, Custom);
680 else
682 MVT::i64, Custom);
683
685 MVT::Other, Custom);
686
687 static const unsigned IntegerVPOps[] = {
688 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
689 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
690 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
691 ISD::VP_XOR, ISD::VP_SRA, ISD::VP_SRL,
692 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
693 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
694 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
695 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
696 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
697 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
698 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
699 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
700 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
701 ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF,
702 ISD::EXPERIMENTAL_VP_SPLAT};
703
704 static const unsigned FloatingPointVPOps[] = {
705 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
706 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
707 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
708 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
709 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
710 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
711 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
712 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
713 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
714 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
715 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
716 ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE,
717 ISD::EXPERIMENTAL_VP_SPLICE, ISD::VP_REDUCE_FMINIMUM,
718 ISD::VP_REDUCE_FMAXIMUM, ISD::EXPERIMENTAL_VP_SPLAT};
719
720 static const unsigned IntegerVecReduceOps[] = {
724
725 static const unsigned FloatingPointVecReduceOps[] = {
728
729 static const unsigned FloatingPointLibCallOps[] = {
732
733 if (!Subtarget.is64Bit()) {
734 // We must custom-lower certain vXi64 operations on RV32 due to the vector
735 // element type being illegal.
737 MVT::i64, Custom);
738
739 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
740
741 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
742 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
743 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
744 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
745 MVT::i64, Custom);
746 }
747
748 for (MVT VT : BoolVecVTs) {
749 if (!isTypeLegal(VT))
750 continue;
751
753
754 // Mask VTs are custom-expanded into a series of standard nodes
758 VT, Custom);
759
761 Custom);
762
764 setOperationAction({ISD::SELECT_CC, ISD::VSELECT, ISD::VP_SELECT}, VT,
765 Expand);
766 setOperationAction(ISD::VP_MERGE, VT, Custom);
767
768 setOperationAction({ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT,
769 Custom);
770
771 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
772
775 Custom);
776
778 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
779 Custom);
780
781 // RVV has native int->float & float->int conversions where the
782 // element type sizes are within one power-of-two of each other. Any
783 // wider distances between type sizes have to be lowered as sequences
784 // which progressively narrow the gap in stages.
789 VT, Custom);
791 Custom);
792
793 // Expand all extending loads to types larger than this, and truncating
794 // stores from types larger than this.
796 setTruncStoreAction(VT, OtherVT, Expand);
798 OtherVT, Expand);
799 }
800
801 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
802 ISD::VP_TRUNCATE, ISD::VP_SETCC},
803 VT, Custom);
804
807
809
810 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
811 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
812
815 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
816 }
817
818 for (MVT VT : IntVecVTs) {
819 if (!isTypeLegal(VT))
820 continue;
821
824
825 // Vectors implement MULHS/MULHU.
827
828 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
829 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
831
833 Legal);
834
836
837 // Custom-lower extensions and truncations from/to mask types.
839 VT, Custom);
840
841 // RVV has native int->float & float->int conversions where the
842 // element type sizes are within one power-of-two of each other. Any
843 // wider distances between type sizes have to be lowered as sequences
844 // which progressively narrow the gap in stages.
849 VT, Custom);
851 Custom);
855 VT, Legal);
856
857 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
858 // nodes which truncate by one power of two at a time.
861 Custom);
862
863 // Custom-lower insert/extract operations to simplify patterns.
865 Custom);
866
867 // Custom-lower reduction operations to set up the corresponding custom
868 // nodes' operands.
869 setOperationAction(IntegerVecReduceOps, VT, Custom);
870
871 setOperationAction(IntegerVPOps, VT, Custom);
872
874
876 VT, Custom);
877
879 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
880 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
881 VT, Custom);
882
885 VT, Custom);
886
889
891
893 setTruncStoreAction(VT, OtherVT, Expand);
895 OtherVT, Expand);
896 }
897
900
901 // Splice
903
904 if (Subtarget.hasStdExtZvkb()) {
906 setOperationAction(ISD::VP_BSWAP, VT, Custom);
907 } else {
908 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
910 }
911
912 if (Subtarget.hasStdExtZvbb()) {
914 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
915 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
916 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
917 VT, Custom);
918 } else {
919 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
921 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
922 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
923 VT, Expand);
924
925 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
926 // range of f32.
927 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
928 if (isTypeLegal(FloatVT)) {
930 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
931 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
932 VT, Custom);
933 }
934 }
935
937 }
938
939 for (MVT VT : VecTupleVTs) {
940 if (!isTypeLegal(VT))
941 continue;
942
944 }
945
946 // Expand various CCs to best match the RVV ISA, which natively supports UNE
947 // but no other unordered comparisons, and supports all ordered comparisons
948 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
949 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
950 // and we pattern-match those back to the "original", swapping operands once
951 // more. This way we catch both operations and both "vf" and "fv" forms with
952 // fewer patterns.
953 static const ISD::CondCode VFPCCToExpand[] = {
957 };
958
959 // TODO: support more ops.
960 static const unsigned ZvfhminZvfbfminPromoteOps[] = {
968
969 // TODO: support more vp ops.
970 static const unsigned ZvfhminZvfbfminPromoteVPOps[] = {
971 ISD::VP_FADD,
972 ISD::VP_FSUB,
973 ISD::VP_FMUL,
974 ISD::VP_FDIV,
975 ISD::VP_FMA,
976 ISD::VP_REDUCE_FMIN,
977 ISD::VP_REDUCE_FMAX,
978 ISD::VP_SQRT,
979 ISD::VP_FMINNUM,
980 ISD::VP_FMAXNUM,
981 ISD::VP_FCEIL,
982 ISD::VP_FFLOOR,
983 ISD::VP_FROUND,
984 ISD::VP_FROUNDEVEN,
985 ISD::VP_FROUNDTOZERO,
986 ISD::VP_FRINT,
987 ISD::VP_FNEARBYINT,
988 ISD::VP_SETCC,
989 ISD::VP_FMINIMUM,
990 ISD::VP_FMAXIMUM,
991 ISD::VP_REDUCE_FMINIMUM,
992 ISD::VP_REDUCE_FMAXIMUM};
993
994 // Sets common operation actions on RVV floating-point vector types.
995 const auto SetCommonVFPActions = [&](MVT VT) {
997 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
998 // sizes are within one power-of-two of each other. Therefore conversions
999 // between vXf16 and vXf64 must be lowered as sequences which convert via
1000 // vXf32.
1003 // Custom-lower insert/extract operations to simplify patterns.
1005 Custom);
1006 // Expand various condition codes (explained above).
1007 setCondCodeAction(VFPCCToExpand, VT, Expand);
1008
1011
1015 VT, Custom);
1016
1017 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1018
1019 // Expand FP operations that need libcalls.
1020 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1021
1023
1025
1027 VT, Custom);
1028
1030 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1031 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1032 VT, Custom);
1033
1036
1039 VT, Custom);
1040
1043
1045
1046 setOperationAction(FloatingPointVPOps, VT, Custom);
1047
1049 Custom);
1052 VT, Legal);
1057 VT, Custom);
1058
1060 };
1061
1062 // Sets common extload/truncstore actions on RVV floating-point vector
1063 // types.
1064 const auto SetCommonVFPExtLoadTruncStoreActions =
1065 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1066 for (auto SmallVT : SmallerVTs) {
1067 setTruncStoreAction(VT, SmallVT, Expand);
1068 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1069 }
1070 };
1071
1072 // Sets common actions for f16 and bf16 for when there's only
1073 // zvfhmin/zvfbfmin and we need to promote to f32 for most operations.
1074 const auto SetCommonPromoteToF32Actions = [&](MVT VT) {
1077 Custom);
1078 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1079 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1080 Custom);
1082 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom);
1088 VT, Custom);
1089 MVT EltVT = VT.getVectorElementType();
1090 if (isTypeLegal(EltVT))
1091 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT,
1093 VT, Custom);
1094 else
1095 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT},
1096 EltVT, Custom);
1098 ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD,
1099 ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1100 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1101 ISD::VP_SCATTER},
1102 VT, Custom);
1103
1107
1108 // Expand FP operations that need libcalls.
1109 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1110
1111 // Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.
1112 if (getLMUL(VT) == RISCVII::VLMUL::LMUL_8) {
1113 setOperationAction(ZvfhminZvfbfminPromoteOps, VT, Custom);
1114 setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom);
1115 } else {
1116 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1117 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1118 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1119 }
1120 };
1121
1122 if (Subtarget.hasVInstructionsF16()) {
1123 for (MVT VT : F16VecVTs) {
1124 if (!isTypeLegal(VT))
1125 continue;
1126 SetCommonVFPActions(VT);
1127 }
1128 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1129 for (MVT VT : F16VecVTs) {
1130 if (!isTypeLegal(VT))
1131 continue;
1132 SetCommonPromoteToF32Actions(VT);
1133 }
1134 }
1135
1136 if (Subtarget.hasVInstructionsBF16Minimal()) {
1137 for (MVT VT : BF16VecVTs) {
1138 if (!isTypeLegal(VT))
1139 continue;
1140 SetCommonPromoteToF32Actions(VT);
1141 }
1142 }
1143
1144 if (Subtarget.hasVInstructionsF32()) {
1145 for (MVT VT : F32VecVTs) {
1146 if (!isTypeLegal(VT))
1147 continue;
1148 SetCommonVFPActions(VT);
1149 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1150 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1151 }
1152 }
1153
1154 if (Subtarget.hasVInstructionsF64()) {
1155 for (MVT VT : F64VecVTs) {
1156 if (!isTypeLegal(VT))
1157 continue;
1158 SetCommonVFPActions(VT);
1159 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1160 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1161 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1162 }
1163 }
1164
1165 if (Subtarget.useRVVForFixedLengthVectors()) {
1167 if (!useRVVForFixedLengthVectorVT(VT))
1168 continue;
1169
1170 // By default everything must be expanded.
1171 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1174 setTruncStoreAction(VT, OtherVT, Expand);
1176 OtherVT, Expand);
1177 }
1178
1179 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1180 // expansion to a build_vector of 0s.
1182
1183 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1185 Custom);
1186
1189 Custom);
1190
1192 VT, Custom);
1193
1195
1197
1199
1201
1204 Custom);
1205
1207
1210 Custom);
1211
1213 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1214 Custom);
1215
1217 {
1226 },
1227 VT, Custom);
1229 Custom);
1230
1232
1233 // Operations below are different for between masks and other vectors.
1234 if (VT.getVectorElementType() == MVT::i1) {
1235 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1236 ISD::OR, ISD::XOR},
1237 VT, Custom);
1238
1239 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1240 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1241 VT, Custom);
1242
1243 setOperationAction(ISD::VP_MERGE, VT, Custom);
1244
1245 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1246 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1247 continue;
1248 }
1249
1250 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1251 // it before type legalization for i64 vectors on RV32. It will then be
1252 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1253 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1254 // improvements first.
1255 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1258 }
1259
1262
1263 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1264 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1265 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1266 ISD::VP_SCATTER},
1267 VT, Custom);
1268
1272 VT, Custom);
1273
1276
1278
1279 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1280 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1282
1286 VT, Custom);
1287
1289
1292
1293 // Custom-lower reduction operations to set up the corresponding custom
1294 // nodes' operands.
1298 VT, Custom);
1299
1300 setOperationAction(IntegerVPOps, VT, Custom);
1301
1302 if (Subtarget.hasStdExtZvkb())
1304
1305 if (Subtarget.hasStdExtZvbb()) {
1308 VT, Custom);
1309 } else {
1310 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1311 // range of f32.
1312 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1313 if (isTypeLegal(FloatVT))
1316 Custom);
1317 }
1318
1320 }
1321
1323 // There are no extending loads or truncating stores.
1324 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1325 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1326 setTruncStoreAction(VT, InnerVT, Expand);
1327 }
1328
1329 if (!useRVVForFixedLengthVectorVT(VT))
1330 continue;
1331
1332 // By default everything must be expanded.
1333 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1335
1336 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1337 // expansion to a build_vector of 0s.
1339
1344 VT, Custom);
1345
1348 VT, Custom);
1349 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER,
1350 ISD::VP_SCATTER, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1351 ISD::EXPERIMENTAL_VP_STRIDED_STORE},
1352 VT, Custom);
1353
1356 Custom);
1357
1358 if (VT.getVectorElementType() == MVT::f16 &&
1359 !Subtarget.hasVInstructionsF16()) {
1361 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1363 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1364 Custom);
1365 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT,
1366 Custom);
1367 if (Subtarget.hasStdExtZfhmin()) {
1369 } else {
1370 // We need to custom legalize f16 build vectors if Zfhmin isn't
1371 // available.
1373 }
1377 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1378 // Don't promote f16 vector operations to f32 if f32 vector type is
1379 // not legal.
1380 // TODO: could split the f16 vector into two vectors and do promotion.
1381 if (!isTypeLegal(F32VecVT))
1382 continue;
1383 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1384 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1385 continue;
1386 }
1387
1388 if (VT.getVectorElementType() == MVT::bf16) {
1390 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1391 if (Subtarget.hasStdExtZfbfmin()) {
1393 } else {
1394 // We need to custom legalize bf16 build vectors if Zfbfmin isn't
1395 // available.
1397 }
1399 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1400 Custom);
1401 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1402 // Don't promote f16 vector operations to f32 if f32 vector type is
1403 // not legal.
1404 // TODO: could split the f16 vector into two vectors and do promotion.
1405 if (!isTypeLegal(F32VecVT))
1406 continue;
1407 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1408 // TODO: Promote VP ops to fp32.
1409 continue;
1410 }
1411
1413 Custom);
1414
1419 VT, Custom);
1420
1423 VT, Custom);
1424
1425 setCondCodeAction(VFPCCToExpand, VT, Expand);
1426
1429
1431
1432 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1433
1434 setOperationAction(FloatingPointVPOps, VT, Custom);
1435
1442 VT, Custom);
1443 }
1444
1445 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1446 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32}, Custom);
1447 if (Subtarget.is64Bit())
1449 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1451 if (Subtarget.hasStdExtZfbfmin())
1453 if (Subtarget.hasStdExtFOrZfinx())
1455 if (Subtarget.hasStdExtDOrZdinx())
1457 }
1458 }
1459
1460 if (Subtarget.hasStdExtA())
1462
1463 if (Subtarget.hasForcedAtomics()) {
1464 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1470 XLenVT, LibCall);
1471 }
1472
1473 if (Subtarget.hasVendorXTHeadMemIdx()) {
1474 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1475 setIndexedLoadAction(im, MVT::i8, Legal);
1476 setIndexedStoreAction(im, MVT::i8, Legal);
1477 setIndexedLoadAction(im, MVT::i16, Legal);
1478 setIndexedStoreAction(im, MVT::i16, Legal);
1479 setIndexedLoadAction(im, MVT::i32, Legal);
1480 setIndexedStoreAction(im, MVT::i32, Legal);
1481
1482 if (Subtarget.is64Bit()) {
1483 setIndexedLoadAction(im, MVT::i64, Legal);
1484 setIndexedStoreAction(im, MVT::i64, Legal);
1485 }
1486 }
1487 }
1488
1489 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
1493
1497 }
1498
1499 // Function alignments.
1500 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
1501 setMinFunctionAlignment(FunctionAlignment);
1502 // Set preferred alignments.
1505
1511
1512 if (Subtarget.hasStdExtFOrZfinx())
1514
1515 if (Subtarget.hasStdExtZbb())
1517
1518 if ((Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) ||
1519 Subtarget.hasVInstructions())
1521
1522 if (Subtarget.hasStdExtZbkb())
1524
1525 if (Subtarget.hasStdExtFOrZfinx())
1528 if (Subtarget.hasVInstructions())
1530 ISD::MSCATTER, ISD::VP_GATHER,
1531 ISD::VP_SCATTER, ISD::SRA,
1535 ISD::VP_STORE, ISD::EXPERIMENTAL_VP_REVERSE,
1541
1542 if (Subtarget.hasVendorXTHeadMemPair())
1544 if (Subtarget.useRVVForFixedLengthVectors())
1546
1547 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1548 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1549
1550 // Disable strict node mutation.
1551 IsStrictFPEnabled = true;
1552 EnableExtLdPromotion = true;
1553
1554 // Let the subtarget decide if a predictable select is more expensive than the
1555 // corresponding branch. This information is used in CGP/SelectOpt to decide
1556 // when to convert selects into branches.
1557 PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive();
1558
1559 MaxStoresPerMemsetOptSize = Subtarget.getMaxStoresPerMemset(/*OptSize=*/true);
1560 MaxStoresPerMemset = Subtarget.getMaxStoresPerMemset(/*OptSize=*/false);
1561
1563 MaxStoresPerMemcpyOptSize = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/true);
1564 MaxStoresPerMemcpy = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/false);
1565
1567 Subtarget.getMaxStoresPerMemmove(/*OptSize=*/true);
1568 MaxStoresPerMemmove = Subtarget.getMaxStoresPerMemmove(/*OptSize=*/false);
1569
1570 MaxLoadsPerMemcmpOptSize = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/true);
1571 MaxLoadsPerMemcmp = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/false);
1572}
1573
1575 LLVMContext &Context,
1576 EVT VT) const {
1577 if (!VT.isVector())
1578 return getPointerTy(DL);
1579 if (Subtarget.hasVInstructions() &&
1580 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1581 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1583}
1584
1585MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1586 return Subtarget.getXLenVT();
1587}
1588
1589// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1590bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1591 unsigned VF,
1592 bool IsScalable) const {
1593 if (!Subtarget.hasVInstructions())
1594 return true;
1595
1596 if (!IsScalable)
1597 return true;
1598
1599 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1600 return true;
1601
1602 // Don't allow VF=1 if those types are't legal.
1603 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1604 return true;
1605
1606 // VLEN=32 support is incomplete.
1607 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1608 return true;
1609
1610 // The maximum VF is for the smallest element width with LMUL=8.
1611 // VF must be a power of 2.
1612 unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
1613 return VF > MaxVF || !isPowerOf2_32(VF);
1614}
1615
1617 return !Subtarget.hasVInstructions() ||
1618 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
1619}
1620
1622 const CallInst &I,
1623 MachineFunction &MF,
1624 unsigned Intrinsic) const {
1625 auto &DL = I.getDataLayout();
1626
1627 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1628 bool IsUnitStrided, bool UsePtrVal = false) {
1630 // We can't use ptrVal if the intrinsic can access memory before the
1631 // pointer. This means we can't use it for strided or indexed intrinsics.
1632 if (UsePtrVal)
1633 Info.ptrVal = I.getArgOperand(PtrOp);
1634 else
1635 Info.fallbackAddressSpace =
1636 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1637 Type *MemTy;
1638 if (IsStore) {
1639 // Store value is the first operand.
1640 MemTy = I.getArgOperand(0)->getType();
1641 } else {
1642 // Use return type. If it's segment load, return type is a struct.
1643 MemTy = I.getType();
1644 if (MemTy->isStructTy())
1645 MemTy = MemTy->getStructElementType(0);
1646 }
1647 if (!IsUnitStrided)
1648 MemTy = MemTy->getScalarType();
1649
1650 Info.memVT = getValueType(DL, MemTy);
1651 if (MemTy->isTargetExtTy()) {
1652 // RISC-V vector tuple type's alignment type should be its element type.
1653 if (cast<TargetExtType>(MemTy)->getName() == "riscv.vector.tuple")
1654 MemTy = Type::getIntNTy(
1655 MemTy->getContext(),
1656 1 << cast<ConstantInt>(I.getArgOperand(I.arg_size() - 1))
1657 ->getZExtValue());
1658 Info.align = DL.getABITypeAlign(MemTy);
1659 } else {
1660 Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);
1661 }
1663 Info.flags |=
1665 return true;
1666 };
1667
1668 if (I.hasMetadata(LLVMContext::MD_nontemporal))
1670
1672 switch (Intrinsic) {
1673 default:
1674 return false;
1675 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1676 case Intrinsic::riscv_masked_atomicrmw_add_i32:
1677 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1678 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1679 case Intrinsic::riscv_masked_atomicrmw_max_i32:
1680 case Intrinsic::riscv_masked_atomicrmw_min_i32:
1681 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1682 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1683 case Intrinsic::riscv_masked_cmpxchg_i32:
1685 Info.memVT = MVT::i32;
1686 Info.ptrVal = I.getArgOperand(0);
1687 Info.offset = 0;
1688 Info.align = Align(4);
1691 return true;
1692 case Intrinsic::riscv_seg2_load:
1693 case Intrinsic::riscv_seg3_load:
1694 case Intrinsic::riscv_seg4_load:
1695 case Intrinsic::riscv_seg5_load:
1696 case Intrinsic::riscv_seg6_load:
1697 case Intrinsic::riscv_seg7_load:
1698 case Intrinsic::riscv_seg8_load:
1699 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1700 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1701 case Intrinsic::riscv_seg2_store:
1702 case Intrinsic::riscv_seg3_store:
1703 case Intrinsic::riscv_seg4_store:
1704 case Intrinsic::riscv_seg5_store:
1705 case Intrinsic::riscv_seg6_store:
1706 case Intrinsic::riscv_seg7_store:
1707 case Intrinsic::riscv_seg8_store:
1708 // Operands are (vec, ..., vec, ptr, vl)
1709 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1710 /*IsStore*/ true,
1711 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1712 case Intrinsic::riscv_vle:
1713 case Intrinsic::riscv_vle_mask:
1714 case Intrinsic::riscv_vleff:
1715 case Intrinsic::riscv_vleff_mask:
1716 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1717 /*IsStore*/ false,
1718 /*IsUnitStrided*/ true,
1719 /*UsePtrVal*/ true);
1720 case Intrinsic::riscv_vse:
1721 case Intrinsic::riscv_vse_mask:
1722 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1723 /*IsStore*/ true,
1724 /*IsUnitStrided*/ true,
1725 /*UsePtrVal*/ true);
1726 case Intrinsic::riscv_vlse:
1727 case Intrinsic::riscv_vlse_mask:
1728 case Intrinsic::riscv_vloxei:
1729 case Intrinsic::riscv_vloxei_mask:
1730 case Intrinsic::riscv_vluxei:
1731 case Intrinsic::riscv_vluxei_mask:
1732 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1733 /*IsStore*/ false,
1734 /*IsUnitStrided*/ false);
1735 case Intrinsic::riscv_vsse:
1736 case Intrinsic::riscv_vsse_mask:
1737 case Intrinsic::riscv_vsoxei:
1738 case Intrinsic::riscv_vsoxei_mask:
1739 case Intrinsic::riscv_vsuxei:
1740 case Intrinsic::riscv_vsuxei_mask:
1741 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1742 /*IsStore*/ true,
1743 /*IsUnitStrided*/ false);
1744 case Intrinsic::riscv_vlseg2:
1745 case Intrinsic::riscv_vlseg3:
1746 case Intrinsic::riscv_vlseg4:
1747 case Intrinsic::riscv_vlseg5:
1748 case Intrinsic::riscv_vlseg6:
1749 case Intrinsic::riscv_vlseg7:
1750 case Intrinsic::riscv_vlseg8:
1751 case Intrinsic::riscv_vlseg2ff:
1752 case Intrinsic::riscv_vlseg3ff:
1753 case Intrinsic::riscv_vlseg4ff:
1754 case Intrinsic::riscv_vlseg5ff:
1755 case Intrinsic::riscv_vlseg6ff:
1756 case Intrinsic::riscv_vlseg7ff:
1757 case Intrinsic::riscv_vlseg8ff:
1758 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1759 /*IsStore*/ false,
1760 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1761 case Intrinsic::riscv_vlseg2_mask:
1762 case Intrinsic::riscv_vlseg3_mask:
1763 case Intrinsic::riscv_vlseg4_mask:
1764 case Intrinsic::riscv_vlseg5_mask:
1765 case Intrinsic::riscv_vlseg6_mask:
1766 case Intrinsic::riscv_vlseg7_mask:
1767 case Intrinsic::riscv_vlseg8_mask:
1768 case Intrinsic::riscv_vlseg2ff_mask:
1769 case Intrinsic::riscv_vlseg3ff_mask:
1770 case Intrinsic::riscv_vlseg4ff_mask:
1771 case Intrinsic::riscv_vlseg5ff_mask:
1772 case Intrinsic::riscv_vlseg6ff_mask:
1773 case Intrinsic::riscv_vlseg7ff_mask:
1774 case Intrinsic::riscv_vlseg8ff_mask:
1775 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1776 /*IsStore*/ false,
1777 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1778 case Intrinsic::riscv_vlsseg2:
1779 case Intrinsic::riscv_vlsseg3:
1780 case Intrinsic::riscv_vlsseg4:
1781 case Intrinsic::riscv_vlsseg5:
1782 case Intrinsic::riscv_vlsseg6:
1783 case Intrinsic::riscv_vlsseg7:
1784 case Intrinsic::riscv_vlsseg8:
1785 case Intrinsic::riscv_vloxseg2:
1786 case Intrinsic::riscv_vloxseg3:
1787 case Intrinsic::riscv_vloxseg4:
1788 case Intrinsic::riscv_vloxseg5:
1789 case Intrinsic::riscv_vloxseg6:
1790 case Intrinsic::riscv_vloxseg7:
1791 case Intrinsic::riscv_vloxseg8:
1792 case Intrinsic::riscv_vluxseg2:
1793 case Intrinsic::riscv_vluxseg3:
1794 case Intrinsic::riscv_vluxseg4:
1795 case Intrinsic::riscv_vluxseg5:
1796 case Intrinsic::riscv_vluxseg6:
1797 case Intrinsic::riscv_vluxseg7:
1798 case Intrinsic::riscv_vluxseg8:
1799 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1800 /*IsStore*/ false,
1801 /*IsUnitStrided*/ false);
1802 case Intrinsic::riscv_vlsseg2_mask:
1803 case Intrinsic::riscv_vlsseg3_mask:
1804 case Intrinsic::riscv_vlsseg4_mask:
1805 case Intrinsic::riscv_vlsseg5_mask:
1806 case Intrinsic::riscv_vlsseg6_mask:
1807 case Intrinsic::riscv_vlsseg7_mask:
1808 case Intrinsic::riscv_vlsseg8_mask:
1809 case Intrinsic::riscv_vloxseg2_mask:
1810 case Intrinsic::riscv_vloxseg3_mask:
1811 case Intrinsic::riscv_vloxseg4_mask:
1812 case Intrinsic::riscv_vloxseg5_mask:
1813 case Intrinsic::riscv_vloxseg6_mask:
1814 case Intrinsic::riscv_vloxseg7_mask:
1815 case Intrinsic::riscv_vloxseg8_mask:
1816 case Intrinsic::riscv_vluxseg2_mask:
1817 case Intrinsic::riscv_vluxseg3_mask:
1818 case Intrinsic::riscv_vluxseg4_mask:
1819 case Intrinsic::riscv_vluxseg5_mask:
1820 case Intrinsic::riscv_vluxseg6_mask:
1821 case Intrinsic::riscv_vluxseg7_mask:
1822 case Intrinsic::riscv_vluxseg8_mask:
1823 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 6,
1824 /*IsStore*/ false,
1825 /*IsUnitStrided*/ false);
1826 case Intrinsic::riscv_vsseg2:
1827 case Intrinsic::riscv_vsseg3:
1828 case Intrinsic::riscv_vsseg4:
1829 case Intrinsic::riscv_vsseg5:
1830 case Intrinsic::riscv_vsseg6:
1831 case Intrinsic::riscv_vsseg7:
1832 case Intrinsic::riscv_vsseg8:
1833 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1834 /*IsStore*/ true,
1835 /*IsUnitStrided*/ false);
1836 case Intrinsic::riscv_vsseg2_mask:
1837 case Intrinsic::riscv_vsseg3_mask:
1838 case Intrinsic::riscv_vsseg4_mask:
1839 case Intrinsic::riscv_vsseg5_mask:
1840 case Intrinsic::riscv_vsseg6_mask:
1841 case Intrinsic::riscv_vsseg7_mask:
1842 case Intrinsic::riscv_vsseg8_mask:
1843 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1844 /*IsStore*/ true,
1845 /*IsUnitStrided*/ false);
1846 case Intrinsic::riscv_vssseg2:
1847 case Intrinsic::riscv_vssseg3:
1848 case Intrinsic::riscv_vssseg4:
1849 case Intrinsic::riscv_vssseg5:
1850 case Intrinsic::riscv_vssseg6:
1851 case Intrinsic::riscv_vssseg7:
1852 case Intrinsic::riscv_vssseg8:
1853 case Intrinsic::riscv_vsoxseg2:
1854 case Intrinsic::riscv_vsoxseg3:
1855 case Intrinsic::riscv_vsoxseg4:
1856 case Intrinsic::riscv_vsoxseg5:
1857 case Intrinsic::riscv_vsoxseg6:
1858 case Intrinsic::riscv_vsoxseg7:
1859 case Intrinsic::riscv_vsoxseg8:
1860 case Intrinsic::riscv_vsuxseg2:
1861 case Intrinsic::riscv_vsuxseg3:
1862 case Intrinsic::riscv_vsuxseg4:
1863 case Intrinsic::riscv_vsuxseg5:
1864 case Intrinsic::riscv_vsuxseg6:
1865 case Intrinsic::riscv_vsuxseg7:
1866 case Intrinsic::riscv_vsuxseg8:
1867 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1868 /*IsStore*/ true,
1869 /*IsUnitStrided*/ false);
1870 case Intrinsic::riscv_vssseg2_mask:
1871 case Intrinsic::riscv_vssseg3_mask:
1872 case Intrinsic::riscv_vssseg4_mask:
1873 case Intrinsic::riscv_vssseg5_mask:
1874 case Intrinsic::riscv_vssseg6_mask:
1875 case Intrinsic::riscv_vssseg7_mask:
1876 case Intrinsic::riscv_vssseg8_mask:
1877 case Intrinsic::riscv_vsoxseg2_mask:
1878 case Intrinsic::riscv_vsoxseg3_mask:
1879 case Intrinsic::riscv_vsoxseg4_mask:
1880 case Intrinsic::riscv_vsoxseg5_mask:
1881 case Intrinsic::riscv_vsoxseg6_mask:
1882 case Intrinsic::riscv_vsoxseg7_mask:
1883 case Intrinsic::riscv_vsoxseg8_mask:
1884 case Intrinsic::riscv_vsuxseg2_mask:
1885 case Intrinsic::riscv_vsuxseg3_mask:
1886 case Intrinsic::riscv_vsuxseg4_mask:
1887 case Intrinsic::riscv_vsuxseg5_mask:
1888 case Intrinsic::riscv_vsuxseg6_mask:
1889 case Intrinsic::riscv_vsuxseg7_mask:
1890 case Intrinsic::riscv_vsuxseg8_mask:
1891 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1892 /*IsStore*/ true,
1893 /*IsUnitStrided*/ false);
1894 }
1895}
1896
1898 const AddrMode &AM, Type *Ty,
1899 unsigned AS,
1900 Instruction *I) const {
1901 // No global is ever allowed as a base.
1902 if (AM.BaseGV)
1903 return false;
1904
1905 // None of our addressing modes allows a scalable offset
1906 if (AM.ScalableOffset)
1907 return false;
1908
1909 // RVV instructions only support register addressing.
1910 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1911 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1912
1913 // Require a 12-bit signed offset.
1914 if (!isInt<12>(AM.BaseOffs))
1915 return false;
1916
1917 switch (AM.Scale) {
1918 case 0: // "r+i" or just "i", depending on HasBaseReg.
1919 break;
1920 case 1:
1921 if (!AM.HasBaseReg) // allow "r+i".
1922 break;
1923 return false; // disallow "r+r" or "r+r+i".
1924 default:
1925 return false;
1926 }
1927
1928 return true;
1929}
1930
1932 return isInt<12>(Imm);
1933}
1934
1936 return isInt<12>(Imm);
1937}
1938
1939// On RV32, 64-bit integers are split into their high and low parts and held
1940// in two different registers, so the trunc is free since the low register can
1941// just be used.
1942// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1943// isTruncateFree?
1945 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1946 return false;
1947 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1948 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1949 return (SrcBits == 64 && DestBits == 32);
1950}
1951
1953 // We consider i64->i32 free on RV64 since we have good selection of W
1954 // instructions that make promoting operations back to i64 free in many cases.
1955 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1956 !DstVT.isInteger())
1957 return false;
1958 unsigned SrcBits = SrcVT.getSizeInBits();
1959 unsigned DestBits = DstVT.getSizeInBits();
1960 return (SrcBits == 64 && DestBits == 32);
1961}
1962
1964 EVT SrcVT = Val.getValueType();
1965 // free truncate from vnsrl and vnsra
1966 if (Subtarget.hasVInstructions() &&
1967 (Val.getOpcode() == ISD::SRL || Val.getOpcode() == ISD::SRA) &&
1968 SrcVT.isVector() && VT2.isVector()) {
1969 unsigned SrcBits = SrcVT.getVectorElementType().getSizeInBits();
1970 unsigned DestBits = VT2.getVectorElementType().getSizeInBits();
1971 if (SrcBits == DestBits * 2) {
1972 return true;
1973 }
1974 }
1975 return TargetLowering::isTruncateFree(Val, VT2);
1976}
1977
1979 // Zexts are free if they can be combined with a load.
1980 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1981 // poorly with type legalization of compares preferring sext.
1982 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1983 EVT MemVT = LD->getMemoryVT();
1984 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1985 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1986 LD->getExtensionType() == ISD::ZEXTLOAD))
1987 return true;
1988 }
1989
1990 return TargetLowering::isZExtFree(Val, VT2);
1991}
1992
1994 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1995}
1996
1998 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1999}
2000
2002 return Subtarget.hasStdExtZbb() ||
2003 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());
2004}
2005
2007 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
2008 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());
2009}
2010
2012 const Instruction &AndI) const {
2013 // We expect to be able to match a bit extraction instruction if the Zbs
2014 // extension is supported and the mask is a power of two. However, we
2015 // conservatively return false if the mask would fit in an ANDI instruction,
2016 // on the basis that it's possible the sinking+duplication of the AND in
2017 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
2018 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
2019 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
2020 return false;
2021 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
2022 if (!Mask)
2023 return false;
2024 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
2025}
2026
2028 EVT VT = Y.getValueType();
2029
2030 // FIXME: Support vectors once we have tests.
2031 if (VT.isVector())
2032 return false;
2033
2034 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
2035 (!isa<ConstantSDNode>(Y) || cast<ConstantSDNode>(Y)->isOpaque());
2036}
2037
2039 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
2040 if (Subtarget.hasStdExtZbs())
2041 return X.getValueType().isScalarInteger();
2042 auto *C = dyn_cast<ConstantSDNode>(Y);
2043 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
2044 if (Subtarget.hasVendorXTHeadBs())
2045 return C != nullptr;
2046 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
2047 return C && C->getAPIntValue().ule(10);
2048}
2049
2051 EVT VT) const {
2052 // Only enable for rvv.
2053 if (!VT.isVector() || !Subtarget.hasVInstructions())
2054 return false;
2055
2056 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
2057 return false;
2058
2059 return true;
2060}
2061
2063 Type *Ty) const {
2064 assert(Ty->isIntegerTy());
2065
2066 unsigned BitSize = Ty->getIntegerBitWidth();
2067 if (BitSize > Subtarget.getXLen())
2068 return false;
2069
2070 // Fast path, assume 32-bit immediates are cheap.
2071 int64_t Val = Imm.getSExtValue();
2072 if (isInt<32>(Val))
2073 return true;
2074
2075 // A constant pool entry may be more aligned thant he load we're trying to
2076 // replace. If we don't support unaligned scalar mem, prefer the constant
2077 // pool.
2078 // TODO: Can the caller pass down the alignment?
2079 if (!Subtarget.enableUnalignedScalarMem())
2080 return true;
2081
2082 // Prefer to keep the load if it would require many instructions.
2083 // This uses the same threshold we use for constant pools but doesn't
2084 // check useConstantPoolForLargeInts.
2085 // TODO: Should we keep the load only when we're definitely going to emit a
2086 // constant pool?
2087
2089 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
2090}
2091
2095 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
2096 SelectionDAG &DAG) const {
2097 // One interesting pattern that we'd want to form is 'bit extract':
2098 // ((1 >> Y) & 1) ==/!= 0
2099 // But we also need to be careful not to try to reverse that fold.
2100
2101 // Is this '((1 >> Y) & 1)'?
2102 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
2103 return false; // Keep the 'bit extract' pattern.
2104
2105 // Will this be '((1 >> Y) & 1)' after the transform?
2106 if (NewShiftOpcode == ISD::SRL && CC->isOne())
2107 return true; // Do form the 'bit extract' pattern.
2108
2109 // If 'X' is a constant, and we transform, then we will immediately
2110 // try to undo the fold, thus causing endless combine loop.
2111 // So only do the transform if X is not a constant. This matches the default
2112 // implementation of this function.
2113 return !XC;
2114}
2115
2117 unsigned Opc = VecOp.getOpcode();
2118
2119 // Assume target opcodes can't be scalarized.
2120 // TODO - do we have any exceptions?
2121 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
2122 return false;
2123
2124 // If the vector op is not supported, try to convert to scalar.
2125 EVT VecVT = VecOp.getValueType();
2126 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
2127 return true;
2128
2129 // If the vector op is supported, but the scalar op is not, the transform may
2130 // not be worthwhile.
2131 // Permit a vector binary operation can be converted to scalar binary
2132 // operation which is custom lowered with illegal type.
2133 EVT ScalarVT = VecVT.getScalarType();
2134 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2135 isOperationCustom(Opc, ScalarVT);
2136}
2137
2139 const GlobalAddressSDNode *GA) const {
2140 // In order to maximise the opportunity for common subexpression elimination,
2141 // keep a separate ADD node for the global address offset instead of folding
2142 // it in the global address node. Later peephole optimisations may choose to
2143 // fold it back in when profitable.
2144 return false;
2145}
2146
2147// Returns 0-31 if the fli instruction is available for the type and this is
2148// legal FP immediate for the type. Returns -1 otherwise.
2150 if (!Subtarget.hasStdExtZfa())
2151 return -1;
2152
2153 bool IsSupportedVT = false;
2154 if (VT == MVT::f16) {
2155 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2156 } else if (VT == MVT::f32) {
2157 IsSupportedVT = true;
2158 } else if (VT == MVT::f64) {
2159 assert(Subtarget.hasStdExtD() && "Expect D extension");
2160 IsSupportedVT = true;
2161 }
2162
2163 if (!IsSupportedVT)
2164 return -1;
2165
2166 return RISCVLoadFPImm::getLoadFPImm(Imm);
2167}
2168
2170 bool ForCodeSize) const {
2171 bool IsLegalVT = false;
2172 if (VT == MVT::f16)
2173 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2174 else if (VT == MVT::f32)
2175 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2176 else if (VT == MVT::f64)
2177 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2178 else if (VT == MVT::bf16)
2179 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2180
2181 if (!IsLegalVT)
2182 return false;
2183
2184 if (getLegalZfaFPImm(Imm, VT) >= 0)
2185 return true;
2186
2187 // Cannot create a 64 bit floating-point immediate value for rv32.
2188 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2189 // td can handle +0.0 or -0.0 already.
2190 // -0.0 can be created by fmv + fneg.
2191 return Imm.isZero();
2192 }
2193
2194 // Special case: fmv + fneg
2195 if (Imm.isNegZero())
2196 return true;
2197
2198 // Building an integer and then converting requires a fmv at the end of
2199 // the integer sequence. The fmv is not required for Zfinx.
2200 const int FmvCost = Subtarget.hasStdExtZfinx() ? 0 : 1;
2201 const int Cost =
2202 FmvCost + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(),
2203 Subtarget.getXLen(), Subtarget);
2204 return Cost <= FPImmCost;
2205}
2206
2207// TODO: This is very conservative.
2209 unsigned Index) const {
2211 return false;
2212
2213 // Only support extracting a fixed from a fixed vector for now.
2214 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2215 return false;
2216
2217 EVT EltVT = ResVT.getVectorElementType();
2218 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2219
2220 // The smallest type we can slide is i8.
2221 // TODO: We can extract index 0 from a mask vector without a slide.
2222 if (EltVT == MVT::i1)
2223 return false;
2224
2225 unsigned ResElts = ResVT.getVectorNumElements();
2226 unsigned SrcElts = SrcVT.getVectorNumElements();
2227
2228 unsigned MinVLen = Subtarget.getRealMinVLen();
2229 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2230
2231 // If we're extracting only data from the first VLEN bits of the source
2232 // then we can always do this with an m1 vslidedown.vx. Restricting the
2233 // Index ensures we can use a vslidedown.vi.
2234 // TODO: We can generalize this when the exact VLEN is known.
2235 if (Index + ResElts <= MinVLMAX && Index < 31)
2236 return true;
2237
2238 // Convervatively only handle extracting half of a vector.
2239 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2240 // the upper half of a vector until we have more test coverage.
2241 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2242 // a cheap extract. However, this case is important in practice for
2243 // shuffled extracts of longer vectors. How resolve?
2244 return (ResElts * 2) == SrcElts && (Index == 0 || Index == ResElts);
2245}
2246
2249 EVT VT) const {
2250 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2251 // We might still end up using a GPR but that will be decided based on ABI.
2252 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2253 !Subtarget.hasStdExtZfhminOrZhinxmin())
2254 return MVT::f32;
2255
2257
2258 return PartVT;
2259}
2260
2261unsigned
2263 std::optional<MVT> RegisterVT) const {
2264 // Pair inline assembly operand
2265 if (VT == (Subtarget.is64Bit() ? MVT::i128 : MVT::i64) && RegisterVT &&
2266 *RegisterVT == MVT::Untyped)
2267 return 1;
2268
2269 return TargetLowering::getNumRegisters(Context, VT, RegisterVT);
2270}
2271
2274 EVT VT) const {
2275 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2276 // We might still end up using a GPR but that will be decided based on ABI.
2277 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2278 !Subtarget.hasStdExtZfhminOrZhinxmin())
2279 return 1;
2280
2282}
2283
2285 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2286 unsigned &NumIntermediates, MVT &RegisterVT) const {
2288 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2289
2290 return NumRegs;
2291}
2292
2293// Changes the condition code and swaps operands if necessary, so the SetCC
2294// operation matches one of the comparisons supported directly by branches
2295// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2296// with 1/-1.
2297static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
2298 ISD::CondCode &CC, SelectionDAG &DAG) {
2299 // If this is a single bit test that can't be handled by ANDI, shift the
2300 // bit to be tested to the MSB and perform a signed compare with 0.
2301 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2302 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2303 isa<ConstantSDNode>(LHS.getOperand(1))) {
2304 uint64_t Mask = LHS.getConstantOperandVal(1);
2305 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2306 unsigned ShAmt = 0;
2307 if (isPowerOf2_64(Mask)) {
2309 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2310 } else {
2311 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2312 }
2313
2314 LHS = LHS.getOperand(0);
2315 if (ShAmt != 0)
2316 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2317 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2318 return;
2319 }
2320 }
2321
2322 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2323 int64_t C = RHSC->getSExtValue();
2324 switch (CC) {
2325 default: break;
2326 case ISD::SETGT:
2327 // Convert X > -1 to X >= 0.
2328 if (C == -1) {
2329 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2330 CC = ISD::SETGE;
2331 return;
2332 }
2333 break;
2334 case ISD::SETLT:
2335 // Convert X < 1 to 0 >= X.
2336 if (C == 1) {
2337 RHS = LHS;
2338 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2339 CC = ISD::SETGE;
2340 return;
2341 }
2342 break;
2343 }
2344 }
2345
2346 switch (CC) {
2347 default:
2348 break;
2349 case ISD::SETGT:
2350 case ISD::SETLE:
2351 case ISD::SETUGT:
2352 case ISD::SETULE:
2354 std::swap(LHS, RHS);
2355 break;
2356 }
2357}
2358
2360 if (VT.isRISCVVectorTuple()) {
2361 if (VT.SimpleTy >= MVT::riscv_nxv1i8x2 &&
2362 VT.SimpleTy <= MVT::riscv_nxv1i8x8)
2363 return RISCVII::LMUL_F8;
2364 if (VT.SimpleTy >= MVT::riscv_nxv2i8x2 &&
2365 VT.SimpleTy <= MVT::riscv_nxv2i8x8)
2366 return RISCVII::LMUL_F4;
2367 if (VT.SimpleTy >= MVT::riscv_nxv4i8x2 &&
2368 VT.SimpleTy <= MVT::riscv_nxv4i8x8)
2369 return RISCVII::LMUL_F2;
2370 if (VT.SimpleTy >= MVT::riscv_nxv8i8x2 &&
2371 VT.SimpleTy <= MVT::riscv_nxv8i8x8)
2372 return RISCVII::LMUL_1;
2373 if (VT.SimpleTy >= MVT::riscv_nxv16i8x2 &&
2374 VT.SimpleTy <= MVT::riscv_nxv16i8x4)
2375 return RISCVII::LMUL_2;
2376 if (VT.SimpleTy == MVT::riscv_nxv32i8x2)
2377 return RISCVII::LMUL_4;
2378 llvm_unreachable("Invalid vector tuple type LMUL.");
2379 }
2380
2381 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2382 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2383 if (VT.getVectorElementType() == MVT::i1)
2384 KnownSize *= 8;
2385
2386 switch (KnownSize) {
2387 default:
2388 llvm_unreachable("Invalid LMUL.");
2389 case 8:
2391 case 16:
2393 case 32:
2395 case 64:
2397 case 128:
2399 case 256:
2401 case 512:
2403 }
2404}
2405
2407 switch (LMul) {
2408 default:
2409 llvm_unreachable("Invalid LMUL.");
2414 return RISCV::VRRegClassID;
2416 return RISCV::VRM2RegClassID;
2418 return RISCV::VRM4RegClassID;
2420 return RISCV::VRM8RegClassID;
2421 }
2422}
2423
2424unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
2425 RISCVII::VLMUL LMUL = getLMUL(VT);
2426 if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
2427 LMUL == RISCVII::VLMUL::LMUL_F4 ||
2428 LMUL == RISCVII::VLMUL::LMUL_F2 ||
2429 LMUL == RISCVII::VLMUL::LMUL_1) {
2430 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2431 "Unexpected subreg numbering");
2432 return RISCV::sub_vrm1_0 + Index;
2433 }
2434 if (LMUL == RISCVII::VLMUL::LMUL_2) {
2435 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2436 "Unexpected subreg numbering");
2437 return RISCV::sub_vrm2_0 + Index;
2438 }
2439 if (LMUL == RISCVII::VLMUL::LMUL_4) {
2440 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2441 "Unexpected subreg numbering");
2442 return RISCV::sub_vrm4_0 + Index;
2443 }
2444 llvm_unreachable("Invalid vector type.");
2445}
2446
2448 if (VT.isRISCVVectorTuple()) {
2449 unsigned NF = VT.getRISCVVectorTupleNumFields();
2450 unsigned RegsPerField =
2451 std::max(1U, (unsigned)VT.getSizeInBits().getKnownMinValue() /
2452 (NF * RISCV::RVVBitsPerBlock));
2453 switch (RegsPerField) {
2454 case 1:
2455 if (NF == 2)
2456 return RISCV::VRN2M1RegClassID;
2457 if (NF == 3)
2458 return RISCV::VRN3M1RegClassID;
2459 if (NF == 4)
2460 return RISCV::VRN4M1RegClassID;
2461 if (NF == 5)
2462 return RISCV::VRN5M1RegClassID;
2463 if (NF == 6)
2464 return RISCV::VRN6M1RegClassID;
2465 if (NF == 7)
2466 return RISCV::VRN7M1RegClassID;
2467 if (NF == 8)
2468 return RISCV::VRN8M1RegClassID;
2469 break;
2470 case 2:
2471 if (NF == 2)
2472 return RISCV::VRN2M2RegClassID;
2473 if (NF == 3)
2474 return RISCV::VRN3M2RegClassID;
2475 if (NF == 4)
2476 return RISCV::VRN4M2RegClassID;
2477 break;
2478 case 4:
2479 assert(NF == 2);
2480 return RISCV::VRN2M4RegClassID;
2481 default:
2482 break;
2483 }
2484 llvm_unreachable("Invalid vector tuple type RegClass.");
2485 }
2486
2487 if (VT.getVectorElementType() == MVT::i1)
2488 return RISCV::VRRegClassID;
2489 return getRegClassIDForLMUL(getLMUL(VT));
2490}
2491
2492// Attempt to decompose a subvector insert/extract between VecVT and
2493// SubVecVT via subregister indices. Returns the subregister index that
2494// can perform the subvector insert/extract with the given element index, as
2495// well as the index corresponding to any leftover subvectors that must be
2496// further inserted/extracted within the register class for SubVecVT.
2497std::pair<unsigned, unsigned>
2499 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2500 const RISCVRegisterInfo *TRI) {
2501 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2502 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2503 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2504 "Register classes not ordered");
2505 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2506 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2507
2508 // If VecVT is a vector tuple type, either it's the tuple type with same
2509 // RegClass with SubVecVT or SubVecVT is a actually a subvector of the VecVT.
2510 if (VecVT.isRISCVVectorTuple()) {
2511 if (VecRegClassID == SubRegClassID)
2512 return {RISCV::NoSubRegister, 0};
2513
2514 assert(SubVecVT.isScalableVector() &&
2515 "Only allow scalable vector subvector.");
2516 assert(getLMUL(VecVT) == getLMUL(SubVecVT) &&
2517 "Invalid vector tuple insert/extract for vector and subvector with "
2518 "different LMUL.");
2519 return {getSubregIndexByMVT(VecVT, InsertExtractIdx), 0};
2520 }
2521
2522 // Try to compose a subregister index that takes us from the incoming
2523 // LMUL>1 register class down to the outgoing one. At each step we half
2524 // the LMUL:
2525 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2526 // Note that this is not guaranteed to find a subregister index, such as
2527 // when we are extracting from one VR type to another.
2528 unsigned SubRegIdx = RISCV::NoSubRegister;
2529 for (const unsigned RCID :
2530 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2531 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2532 VecVT = VecVT.getHalfNumVectorElementsVT();
2533 bool IsHi =
2534 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2535 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2536 getSubregIndexByMVT(VecVT, IsHi));
2537 if (IsHi)
2538 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2539 }
2540 return {SubRegIdx, InsertExtractIdx};
2541}
2542
2543// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2544// stores for those types.
2545bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2546 return !Subtarget.useRVVForFixedLengthVectors() ||
2547 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2548}
2549
2551 if (!ScalarTy.isSimple())
2552 return false;
2553 switch (ScalarTy.getSimpleVT().SimpleTy) {
2554 case MVT::iPTR:
2555 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2556 case MVT::i8:
2557 case MVT::i16:
2558 case MVT::i32:
2559 return true;
2560 case MVT::i64:
2561 return Subtarget.hasVInstructionsI64();
2562 case MVT::f16:
2563 return Subtarget.hasVInstructionsF16Minimal();
2564 case MVT::bf16:
2565 return Subtarget.hasVInstructionsBF16Minimal();
2566 case MVT::f32:
2567 return Subtarget.hasVInstructionsF32();
2568 case MVT::f64:
2569 return Subtarget.hasVInstructionsF64();
2570 default:
2571 return false;
2572 }
2573}
2574
2575
2576unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2577 return NumRepeatedDivisors;
2578}
2579
2581 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2582 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2583 "Unexpected opcode");
2584 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2585 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2587 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2588 if (!II)
2589 return SDValue();
2590 return Op.getOperand(II->VLOperand + 1 + HasChain);
2591}
2592
2594 const RISCVSubtarget &Subtarget) {
2595 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2596 if (!Subtarget.useRVVForFixedLengthVectors())
2597 return false;
2598
2599 // We only support a set of vector types with a consistent maximum fixed size
2600 // across all supported vector element types to avoid legalization issues.
2601 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2602 // fixed-length vector type we support is 1024 bytes.
2603 if (VT.getFixedSizeInBits() > 1024 * 8)
2604 return false;
2605
2606 unsigned MinVLen = Subtarget.getRealMinVLen();
2607
2608 MVT EltVT = VT.getVectorElementType();
2609
2610 // Don't use RVV for vectors we cannot scalarize if required.
2611 switch (EltVT.SimpleTy) {
2612 // i1 is supported but has different rules.
2613 default:
2614 return false;
2615 case MVT::i1:
2616 // Masks can only use a single register.
2617 if (VT.getVectorNumElements() > MinVLen)
2618 return false;
2619 MinVLen /= 8;
2620 break;
2621 case MVT::i8:
2622 case MVT::i16:
2623 case MVT::i32:
2624 break;
2625 case MVT::i64:
2626 if (!Subtarget.hasVInstructionsI64())
2627 return false;
2628 break;
2629 case MVT::f16:
2630 if (!Subtarget.hasVInstructionsF16Minimal())
2631 return false;
2632 break;
2633 case MVT::bf16:
2634 if (!Subtarget.hasVInstructionsBF16Minimal())
2635 return false;
2636 break;
2637 case MVT::f32:
2638 if (!Subtarget.hasVInstructionsF32())
2639 return false;
2640 break;
2641 case MVT::f64:
2642 if (!Subtarget.hasVInstructionsF64())
2643 return false;
2644 break;
2645 }
2646
2647 // Reject elements larger than ELEN.
2648 if (EltVT.getSizeInBits() > Subtarget.getELen())
2649 return false;
2650
2651 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2652 // Don't use RVV for types that don't fit.
2653 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2654 return false;
2655
2656 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2657 // the base fixed length RVV support in place.
2658 if (!VT.isPow2VectorType())
2659 return false;
2660
2661 return true;
2662}
2663
2664bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2665 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2666}
2667
2668// Return the largest legal scalable vector type that matches VT's element type.
2670 const RISCVSubtarget &Subtarget) {
2671 // This may be called before legal types are setup.
2672 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2673 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2674 "Expected legal fixed length vector!");
2675
2676 unsigned MinVLen = Subtarget.getRealMinVLen();
2677 unsigned MaxELen = Subtarget.getELen();
2678
2679 MVT EltVT = VT.getVectorElementType();
2680 switch (EltVT.SimpleTy) {
2681 default:
2682 llvm_unreachable("unexpected element type for RVV container");
2683 case MVT::i1:
2684 case MVT::i8:
2685 case MVT::i16:
2686 case MVT::i32:
2687 case MVT::i64:
2688 case MVT::bf16:
2689 case MVT::f16:
2690 case MVT::f32:
2691 case MVT::f64: {
2692 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2693 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2694 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2695 unsigned NumElts =
2697 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2698 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2699 return MVT::getScalableVectorVT(EltVT, NumElts);
2700 }
2701 }
2702}
2703
2705 const RISCVSubtarget &Subtarget) {
2707 Subtarget);
2708}
2709
2711 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2712}
2713
2714// Grow V to consume an entire RVV register.
2716 const RISCVSubtarget &Subtarget) {
2717 assert(VT.isScalableVector() &&
2718 "Expected to convert into a scalable vector!");
2719 assert(V.getValueType().isFixedLengthVector() &&
2720 "Expected a fixed length vector operand!");
2721 SDLoc DL(V);
2722 SDValue Zero = DAG.getVectorIdxConstant(0, DL);
2723 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
2724}
2725
2726// Shrink V so it's just big enough to maintain a VT's worth of data.
2728 const RISCVSubtarget &Subtarget) {
2730 "Expected to convert into a fixed length vector!");
2731 assert(V.getValueType().isScalableVector() &&
2732 "Expected a scalable vector operand!");
2733 SDLoc DL(V);
2734 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2735 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
2736}
2737
2738/// Return the type of the mask type suitable for masking the provided
2739/// vector type. This is simply an i1 element type vector of the same
2740/// (possibly scalable) length.
2741static MVT getMaskTypeFor(MVT VecVT) {
2742 assert(VecVT.isVector());
2744 return MVT::getVectorVT(MVT::i1, EC);
2745}
2746
2747/// Creates an all ones mask suitable for masking a vector of type VecTy with
2748/// vector length VL. .
2749static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2750 SelectionDAG &DAG) {
2751 MVT MaskVT = getMaskTypeFor(VecVT);
2752 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2753}
2754
2755static std::pair<SDValue, SDValue>
2757 const RISCVSubtarget &Subtarget) {
2758 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2759 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2760 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2761 return {Mask, VL};
2762}
2763
2764static std::pair<SDValue, SDValue>
2765getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2766 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2767 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2768 SDValue VL = DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2769 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2770 return {Mask, VL};
2771}
2772
2773// Gets the two common "VL" operands: an all-ones mask and the vector length.
2774// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2775// the vector type that the fixed-length vector is contained in. Otherwise if
2776// VecVT is scalable, then ContainerVT should be the same as VecVT.
2777static std::pair<SDValue, SDValue>
2778getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2779 const RISCVSubtarget &Subtarget) {
2780 if (VecVT.isFixedLengthVector())
2781 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2782 Subtarget);
2783 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2784 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
2785}
2786
2788 SelectionDAG &DAG) const {
2789 assert(VecVT.isScalableVector() && "Expected scalable vector");
2790 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2791 VecVT.getVectorElementCount());
2792}
2793
2794std::pair<unsigned, unsigned>
2796 const RISCVSubtarget &Subtarget) {
2797 assert(VecVT.isScalableVector() && "Expected scalable vector");
2798
2799 unsigned EltSize = VecVT.getScalarSizeInBits();
2800 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
2801
2802 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
2803 unsigned MaxVLMAX =
2804 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
2805
2806 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
2807 unsigned MinVLMAX =
2808 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
2809
2810 return std::make_pair(MinVLMAX, MaxVLMAX);
2811}
2812
2813// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2814// of either is (currently) supported. This can get us into an infinite loop
2815// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2816// as a ..., etc.
2817// Until either (or both) of these can reliably lower any node, reporting that
2818// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2819// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2820// which is not desirable.
2822 EVT VT, unsigned DefinedValues) const {
2823 return false;
2824}
2825
2827 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2828 // implementation-defined.
2829 if (!VT.isVector())
2831 unsigned DLenFactor = Subtarget.getDLenFactor();
2832 unsigned Cost;
2833 if (VT.isScalableVector()) {
2834 unsigned LMul;
2835 bool Fractional;
2836 std::tie(LMul, Fractional) =
2838 if (Fractional)
2839 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
2840 else
2841 Cost = (LMul * DLenFactor);
2842 } else {
2843 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
2844 }
2845 return Cost;
2846}
2847
2848
2849/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
2850/// is generally quadratic in the number of vreg implied by LMUL. Note that
2851/// operand (index and possibly mask) are handled separately.
2853 return getLMULCost(VT) * getLMULCost(VT);
2854}
2855
2856/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2857/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2858/// or may track the vrgather.vv cost. It is implementation-dependent.
2860 return getLMULCost(VT);
2861}
2862
2863/// Return the cost of a vslidedown.vx or vslideup.vx instruction
2864/// for the type VT. (This does not cover the vslide1up or vslide1down
2865/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2866/// or may track the vrgather.vv cost. It is implementation-dependent.
2868 return getLMULCost(VT);
2869}
2870
2871/// Return the cost of a vslidedown.vi or vslideup.vi instruction
2872/// for the type VT. (This does not cover the vslide1up or vslide1down
2873/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2874/// or may track the vrgather.vv cost. It is implementation-dependent.
2876 return getLMULCost(VT);
2877}
2878
2880 const RISCVSubtarget &Subtarget) {
2881 // f16 conversions are promoted to f32 when Zfh/Zhinx are not supported.
2882 // bf16 conversions are always promoted to f32.
2883 if ((Op.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2884 Op.getValueType() == MVT::bf16) {
2885 bool IsStrict = Op->isStrictFPOpcode();
2886
2887 SDLoc DL(Op);
2888 if (IsStrict) {
2889 SDValue Val = DAG.getNode(Op.getOpcode(), DL, {MVT::f32, MVT::Other},
2890 {Op.getOperand(0), Op.getOperand(1)});
2891 return DAG.getNode(ISD::STRICT_FP_ROUND, DL,
2892 {Op.getValueType(), MVT::Other},
2893 {Val.getValue(1), Val.getValue(0),
2894 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)});
2895 }
2896 return DAG.getNode(
2897 ISD::FP_ROUND, DL, Op.getValueType(),
2898 DAG.getNode(Op.getOpcode(), DL, MVT::f32, Op.getOperand(0)),
2899 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
2900 }
2901
2902 // Other operations are legal.
2903 return Op;
2904}
2905
2907 const RISCVSubtarget &Subtarget) {
2908 // RISC-V FP-to-int conversions saturate to the destination register size, but
2909 // don't produce 0 for nan. We can use a conversion instruction and fix the
2910 // nan case with a compare and a select.
2911 SDValue Src = Op.getOperand(0);
2912
2913 MVT DstVT = Op.getSimpleValueType();
2914 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2915
2916 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
2917
2918 if (!DstVT.isVector()) {
2919 // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
2920 // the result.
2921 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2922 Src.getValueType() == MVT::bf16) {
2923 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
2924 }
2925
2926 unsigned Opc;
2927 if (SatVT == DstVT)
2928 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
2929 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
2931 else
2932 return SDValue();
2933 // FIXME: Support other SatVTs by clamping before or after the conversion.
2934
2935 SDLoc DL(Op);
2936 SDValue FpToInt = DAG.getNode(
2937 Opc, DL, DstVT, Src,
2939
2940 if (Opc == RISCVISD::FCVT_WU_RV64)
2941 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
2942
2943 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
2944 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
2946 }
2947
2948 // Vectors.
2949
2950 MVT DstEltVT = DstVT.getVectorElementType();
2951 MVT SrcVT = Src.getSimpleValueType();
2952 MVT SrcEltVT = SrcVT.getVectorElementType();
2953 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2954 unsigned DstEltSize = DstEltVT.getSizeInBits();
2955
2956 // Only handle saturating to the destination type.
2957 if (SatVT != DstEltVT)
2958 return SDValue();
2959
2960 MVT DstContainerVT = DstVT;
2961 MVT SrcContainerVT = SrcVT;
2962 if (DstVT.isFixedLengthVector()) {
2963 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
2964 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
2965 assert(DstContainerVT.getVectorElementCount() ==
2966 SrcContainerVT.getVectorElementCount() &&
2967 "Expected same element count");
2968 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2969 }
2970
2971 SDLoc DL(Op);
2972
2973 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
2974
2975 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
2976 {Src, Src, DAG.getCondCode(ISD::SETNE),
2977 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2978
2979 // Need to widen by more than 1 step, promote the FP type, then do a widening
2980 // convert.
2981 if (DstEltSize > (2 * SrcEltSize)) {
2982 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
2983 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
2984 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
2985 }
2986
2987 MVT CvtContainerVT = DstContainerVT;
2988 MVT CvtEltVT = DstEltVT;
2989 if (SrcEltSize > (2 * DstEltSize)) {
2990 CvtEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
2991 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
2992 }
2993
2994 unsigned RVVOpc =
2996 SDValue Res = DAG.getNode(RVVOpc, DL, CvtContainerVT, Src, Mask, VL);
2997
2998 while (CvtContainerVT != DstContainerVT) {
2999 CvtEltVT = MVT::getIntegerVT(CvtEltVT.getSizeInBits() / 2);
3000 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
3001 // Rounding mode here is arbitrary since we aren't shifting out any bits.
3002 unsigned ClipOpc = IsSigned ? RISCVISD::TRUNCATE_VECTOR_VL_SSAT
3004 Res = DAG.getNode(ClipOpc, DL, CvtContainerVT, Res, Mask, VL);
3005 }
3006
3007 SDValue SplatZero = DAG.getNode(
3008 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
3009 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
3010 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
3011 Res, DAG.getUNDEF(DstContainerVT), VL);
3012
3013 if (DstVT.isFixedLengthVector())
3014 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
3015
3016 return Res;
3017}
3018
3020 const RISCVSubtarget &Subtarget) {
3021 bool IsStrict = Op->isStrictFPOpcode();
3022 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3023
3024 // f16 conversions are promoted to f32 when Zfh/Zhinx is not enabled.
3025 // bf16 conversions are always promoted to f32.
3026 if ((SrcVal.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3027 SrcVal.getValueType() == MVT::bf16) {
3028 SDLoc DL(Op);
3029 if (IsStrict) {
3030 SDValue Ext =
3031 DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
3032 {Op.getOperand(0), SrcVal});
3033 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
3034 {Ext.getValue(1), Ext.getValue(0)});
3035 }
3036 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
3037 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, SrcVal));
3038 }
3039
3040 // Other operations are legal.
3041 return Op;
3042}
3043
3045 switch (Opc) {
3046 case ISD::FROUNDEVEN:
3048 case ISD::VP_FROUNDEVEN:
3049 return RISCVFPRndMode::RNE;
3050 case ISD::FTRUNC:
3051 case ISD::STRICT_FTRUNC:
3052 case ISD::VP_FROUNDTOZERO:
3053 return RISCVFPRndMode::RTZ;
3054 case ISD::FFLOOR:
3055 case ISD::STRICT_FFLOOR:
3056 case ISD::VP_FFLOOR:
3057 return RISCVFPRndMode::RDN;
3058 case ISD::FCEIL:
3059 case ISD::STRICT_FCEIL:
3060 case ISD::VP_FCEIL:
3061 return RISCVFPRndMode::RUP;
3062 case ISD::FROUND:
3063 case ISD::STRICT_FROUND:
3064 case ISD::VP_FROUND:
3065 return RISCVFPRndMode::RMM;
3066 case ISD::FRINT:
3067 case ISD::VP_FRINT:
3068 return RISCVFPRndMode::DYN;
3069 }
3070
3072}
3073
3074// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
3075// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
3076// the integer domain and back. Taking care to avoid converting values that are
3077// nan or already correct.
3078static SDValue
3080 const RISCVSubtarget &Subtarget) {
3081 MVT VT = Op.getSimpleValueType();
3082 assert(VT.isVector() && "Unexpected type");
3083
3084 SDLoc DL(Op);
3085
3086 SDValue Src = Op.getOperand(0);
3087
3088 MVT ContainerVT = VT;
3089 if (VT.isFixedLengthVector()) {
3090 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3091 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3092 }
3093
3094 SDValue Mask, VL;
3095 if (Op->isVPOpcode()) {
3096 Mask = Op.getOperand(1);
3097 if (VT.isFixedLengthVector())
3098 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
3099 Subtarget);
3100 VL = Op.getOperand(2);
3101 } else {
3102 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3103 }
3104
3105 // Freeze the source since we are increasing the number of uses.
3106 Src = DAG.getFreeze(Src);
3107
3108 // We do the conversion on the absolute value and fix the sign at the end.
3109 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3110
3111 // Determine the largest integer that can be represented exactly. This and
3112 // values larger than it don't have any fractional bits so don't need to
3113 // be converted.
3114 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3115 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3116 APFloat MaxVal = APFloat(FltSem);
3117 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3118 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3119 SDValue MaxValNode =
3120 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3121 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3122 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3123
3124 // If abs(Src) was larger than MaxVal or nan, keep it.
3125 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3126 Mask =
3127 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
3128 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
3129 Mask, Mask, VL});
3130
3131 // Truncate to integer and convert back to FP.
3132 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3133 MVT XLenVT = Subtarget.getXLenVT();
3134 SDValue Truncated;
3135
3136 switch (Op.getOpcode()) {
3137 default:
3138 llvm_unreachable("Unexpected opcode");
3139 case ISD::FRINT:
3140 case ISD::VP_FRINT:
3141 case ISD::FCEIL:
3142 case ISD::VP_FCEIL:
3143 case ISD::FFLOOR:
3144 case ISD::VP_FFLOOR:
3145 case ISD::FROUND:
3146 case ISD::FROUNDEVEN:
3147 case ISD::VP_FROUND:
3148 case ISD::VP_FROUNDEVEN:
3149 case ISD::VP_FROUNDTOZERO: {
3152 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
3153 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
3154 break;
3155 }
3156 case ISD::FTRUNC:
3157 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
3158 Mask, VL);
3159 break;
3160 case ISD::FNEARBYINT:
3161 case ISD::VP_FNEARBYINT:
3162 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3163 Mask, VL);
3164 break;
3165 }
3166
3167 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3168 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3169 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3170 Mask, VL);
3171
3172 // Restore the original sign so that -0.0 is preserved.
3173 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3174 Src, Src, Mask, VL);
3175
3176 if (!VT.isFixedLengthVector())
3177 return Truncated;
3178
3179 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3180}
3181
3182// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3183// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3184// qNan and coverting the new source to integer and back to FP.
3185static SDValue
3187 const RISCVSubtarget &Subtarget) {
3188 SDLoc DL(Op);
3189 MVT VT = Op.getSimpleValueType();
3190 SDValue Chain = Op.getOperand(0);
3191 SDValue Src = Op.getOperand(1);
3192
3193 MVT ContainerVT = VT;
3194 if (VT.isFixedLengthVector()) {
3195 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3196 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3197 }
3198
3199 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3200
3201 // Freeze the source since we are increasing the number of uses.
3202 Src = DAG.getFreeze(Src);
3203
3204 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
3205 MVT MaskVT = Mask.getSimpleValueType();
3207 DAG.getVTList(MaskVT, MVT::Other),
3208 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3209 DAG.getUNDEF(MaskVT), Mask, VL});
3210 Chain = Unorder.getValue(1);
3212 DAG.getVTList(ContainerVT, MVT::Other),
3213 {Chain, Src, Src, Src, Unorder, VL});
3214 Chain = Src.getValue(1);
3215
3216 // We do the conversion on the absolute value and fix the sign at the end.
3217 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3218
3219 // Determine the largest integer that can be represented exactly. This and
3220 // values larger than it don't have any fractional bits so don't need to
3221 // be converted.
3222 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3223 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3224 APFloat MaxVal = APFloat(FltSem);
3225 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3226 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3227 SDValue MaxValNode =
3228 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3229 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3230 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3231
3232 // If abs(Src) was larger than MaxVal or nan, keep it.
3233 Mask = DAG.getNode(
3234 RISCVISD::SETCC_VL, DL, MaskVT,
3235 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3236
3237 // Truncate to integer and convert back to FP.
3238 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3239 MVT XLenVT = Subtarget.getXLenVT();
3240 SDValue Truncated;
3241
3242 switch (Op.getOpcode()) {
3243 default:
3244 llvm_unreachable("Unexpected opcode");
3245 case ISD::STRICT_FCEIL:
3246 case ISD::STRICT_FFLOOR:
3247 case ISD::STRICT_FROUND:
3251 Truncated = DAG.getNode(
3252 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3253 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3254 break;
3255 }
3256 case ISD::STRICT_FTRUNC:
3257 Truncated =
3259 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3260 break;
3263 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3264 Mask, VL);
3265 break;
3266 }
3267 Chain = Truncated.getValue(1);
3268
3269 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3270 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3271 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3272 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3273 Truncated, Mask, VL);
3274 Chain = Truncated.getValue(1);
3275 }
3276
3277 // Restore the original sign so that -0.0 is preserved.
3278 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3279 Src, Src, Mask, VL);
3280
3281 if (VT.isFixedLengthVector())
3282 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3283 return DAG.getMergeValues({Truncated, Chain}, DL);
3284}
3285
3286static SDValue
3288 const RISCVSubtarget &Subtarget) {
3289 MVT VT = Op.getSimpleValueType();
3290 if (VT.isVector())
3291 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3292
3293 if (DAG.shouldOptForSize())
3294 return SDValue();
3295
3296 SDLoc DL(Op);
3297 SDValue Src = Op.getOperand(0);
3298
3299 // Create an integer the size of the mantissa with the MSB set. This and all
3300 // values larger than it don't have any fractional bits so don't need to be
3301 // converted.
3302 const fltSemantics &FltSem = VT.getFltSemantics();
3303 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3304 APFloat MaxVal = APFloat(FltSem);
3305 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3306 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3307 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3308
3310 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3311 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3312}
3313
3314// Expand vector LRINT and LLRINT by converting to the integer domain.
3316 const RISCVSubtarget &Subtarget) {
3317 MVT VT = Op.getSimpleValueType();
3318 assert(VT.isVector() && "Unexpected type");
3319
3320 SDLoc DL(Op);
3321 SDValue Src = Op.getOperand(0);
3322 MVT ContainerVT = VT;
3323
3324 if (VT.isFixedLengthVector()) {
3325 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3326 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3327 }
3328
3329 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3330 SDValue Truncated = DAG.getNode(
3331 RISCVISD::VFCVT_RM_X_F_VL, DL, ContainerVT, Src, Mask,
3333 VL);
3334
3335 if (!VT.isFixedLengthVector())
3336 return Truncated;
3337
3338 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3339}
3340
3341static SDValue
3343 const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op,
3344 SDValue Offset, SDValue Mask, SDValue VL,
3346 if (Passthru.isUndef())
3348 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3349 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3350 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3351}
3352
3353static SDValue
3354getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3355 EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask,
3356 SDValue VL,
3358 if (Passthru.isUndef())
3360 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3361 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3362 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3363}
3364
3365static MVT getLMUL1VT(MVT VT) {
3367 "Unexpected vector MVT");
3371}
3372
3376 int64_t Addend;
3377};
3378
3379static std::optional<APInt> getExactInteger(const APFloat &APF,
3381 // We will use a SINT_TO_FP to materialize this constant so we should use a
3382 // signed APSInt here.
3383 APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3384 // We use an arbitrary rounding mode here. If a floating-point is an exact
3385 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3386 // the rounding mode changes the output value, then it is not an exact
3387 // integer.
3389 bool IsExact;
3390 // If it is out of signed integer range, it will return an invalid operation.
3391 // If it is not an exact integer, IsExact is false.
3392 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3394 !IsExact)
3395 return std::nullopt;
3396 return ValInt.extractBits(BitWidth, 0);
3397}
3398
3399// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3400// to the (non-zero) step S and start value X. This can be then lowered as the
3401// RVV sequence (VID * S) + X, for example.
3402// The step S is represented as an integer numerator divided by a positive
3403// denominator. Note that the implementation currently only identifies
3404// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3405// cannot detect 2/3, for example.
3406// Note that this method will also match potentially unappealing index
3407// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3408// determine whether this is worth generating code for.
3409//
3410// EltSizeInBits is the size of the type that the sequence will be calculated
3411// in, i.e. SEW for build_vectors or XLEN for address calculations.
3412static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3413 unsigned EltSizeInBits) {
3414 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3415 if (!cast<BuildVectorSDNode>(Op)->isConstant())
3416 return std::nullopt;
3417 bool IsInteger = Op.getValueType().isInteger();
3418
3419 std::optional<unsigned> SeqStepDenom;
3420 std::optional<APInt> SeqStepNum;
3421 std::optional<APInt> SeqAddend;
3422 std::optional<std::pair<APInt, unsigned>> PrevElt;
3423 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3424
3425 // First extract the ops into a list of constant integer values. This may not
3426 // be possible for floats if they're not all representable as integers.
3428 const unsigned OpSize = Op.getScalarValueSizeInBits();
3429 for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3430 if (Elt.isUndef()) {
3431 Elts[Idx] = std::nullopt;
3432 continue;
3433 }
3434 if (IsInteger) {
3435 Elts[Idx] = Elt->getAsAPIntVal().trunc(OpSize).zext(EltSizeInBits);
3436 } else {
3437 auto ExactInteger =
3438 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3439 if (!ExactInteger)
3440 return std::nullopt;
3441 Elts[Idx] = *ExactInteger;
3442 }
3443 }
3444
3445 for (auto [Idx, Elt] : enumerate(Elts)) {
3446 // Assume undef elements match the sequence; we just have to be careful
3447 // when interpolating across them.
3448 if (!Elt)
3449 continue;
3450
3451 if (PrevElt) {
3452 // Calculate the step since the last non-undef element, and ensure
3453 // it's consistent across the entire sequence.
3454 unsigned IdxDiff = Idx - PrevElt->second;
3455 APInt ValDiff = *Elt - PrevElt->first;
3456
3457 // A zero-value value difference means that we're somewhere in the middle
3458 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3459 // step change before evaluating the sequence.
3460 if (ValDiff == 0)
3461 continue;
3462
3463 int64_t Remainder = ValDiff.srem(IdxDiff);
3464 // Normalize the step if it's greater than 1.
3465 if (Remainder != ValDiff.getSExtValue()) {
3466 // The difference must cleanly divide the element span.
3467 if (Remainder != 0)
3468 return std::nullopt;
3469 ValDiff = ValDiff.sdiv(IdxDiff);
3470 IdxDiff = 1;
3471 }
3472
3473 if (!SeqStepNum)
3474 SeqStepNum = ValDiff;
3475 else if (ValDiff != SeqStepNum)
3476 return std::nullopt;
3477
3478 if (!SeqStepDenom)
3479 SeqStepDenom = IdxDiff;
3480 else if (IdxDiff != *SeqStepDenom)
3481 return std::nullopt;
3482 }
3483
3484 // Record this non-undef element for later.
3485 if (!PrevElt || PrevElt->first != *Elt)
3486 PrevElt = std::make_pair(*Elt, Idx);
3487 }
3488
3489 // We need to have logged a step for this to count as a legal index sequence.
3490 if (!SeqStepNum || !SeqStepDenom)
3491 return std::nullopt;
3492
3493 // Loop back through the sequence and validate elements we might have skipped
3494 // while waiting for a valid step. While doing this, log any sequence addend.
3495 for (auto [Idx, Elt] : enumerate(Elts)) {
3496 if (!Elt)
3497 continue;
3498 APInt ExpectedVal =
3499 (APInt(EltSizeInBits, Idx, /*isSigned=*/false, /*implicitTrunc=*/true) *
3500 *SeqStepNum)
3501 .sdiv(*SeqStepDenom);
3502
3503 APInt Addend = *Elt - ExpectedVal;
3504 if (!SeqAddend)
3505 SeqAddend = Addend;
3506 else if (Addend != SeqAddend)
3507 return std::nullopt;
3508 }
3509
3510 assert(SeqAddend && "Must have an addend if we have a step");
3511
3512 return VIDSequence{SeqStepNum->getSExtValue(), *SeqStepDenom,
3513 SeqAddend->getSExtValue()};
3514}
3515
3516// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3517// and lower it as a VRGATHER_VX_VL from the source vector.
3518static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3519 SelectionDAG &DAG,
3520 const RISCVSubtarget &Subtarget) {
3521 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3522 return SDValue();
3523 SDValue Src = SplatVal.getOperand(0);
3524 // Don't perform this optimization for i1 vectors, or if the element types are
3525 // different
3526 // FIXME: Support i1 vectors, maybe by promoting to i8?
3527 MVT EltTy = VT.getVectorElementType();
3528 MVT SrcVT = Src.getSimpleValueType();
3529 if (EltTy == MVT::i1 || EltTy != SrcVT.getVectorElementType())
3530 return SDValue();
3531 SDValue Idx = SplatVal.getOperand(1);
3532 // The index must be a legal type.
3533 if (Idx.getValueType() != Subtarget.getXLenVT())
3534 return SDValue();
3535
3536 // Check that we know Idx lies within VT
3537 if (!TypeSize::isKnownLE(SrcVT.getSizeInBits(), VT.getSizeInBits())) {
3538 auto *CIdx = dyn_cast<ConstantSDNode>(Idx);
3539 if (!CIdx || CIdx->getZExtValue() >= VT.getVectorMinNumElements())
3540 return SDValue();
3541 }
3542
3543 // Convert fixed length vectors to scalable
3544 MVT ContainerVT = VT;
3545 if (VT.isFixedLengthVector())
3546 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3547
3548 MVT SrcContainerVT = SrcVT;
3549 if (SrcVT.isFixedLengthVector()) {
3550 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3551 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3552 }
3553
3554 // Put Vec in a VT sized vector
3555 if (SrcContainerVT.getVectorMinNumElements() <
3556 ContainerVT.getVectorMinNumElements())
3557 Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
3558 DAG.getUNDEF(ContainerVT), Src,
3559 DAG.getVectorIdxConstant(0, DL));
3560 else
3561 Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Src,
3562 DAG.getVectorIdxConstant(0, DL));
3563
3564 // We checked that Idx fits inside VT earlier
3565 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3566 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Src,
3567 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3568 if (VT.isFixedLengthVector())
3569 Gather = convertFromScalableVector(VT, Gather, DAG, Subtarget);
3570 return Gather;
3571}
3572
3573/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3574/// which constitute a large proportion of the elements. In such cases we can
3575/// splat a vector with the dominant element and make up the shortfall with
3576/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3577/// Note that this includes vectors of 2 elements by association. The
3578/// upper-most element is the "dominant" one, allowing us to use a splat to
3579/// "insert" the upper element, and an insert of the lower element at position
3580/// 0, which improves codegen.
3582 const RISCVSubtarget &Subtarget) {
3583 MVT VT = Op.getSimpleValueType();
3584 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3585
3586 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3587
3588 SDLoc DL(Op);
3589 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3590
3591 MVT XLenVT = Subtarget.getXLenVT();
3592 unsigned NumElts = Op.getNumOperands();
3593
3594 SDValue DominantValue;
3595 unsigned MostCommonCount = 0;
3596 DenseMap<SDValue, unsigned> ValueCounts;
3597 unsigned NumUndefElts =
3598 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3599
3600 // Track the number of scalar loads we know we'd be inserting, estimated as
3601 // any non-zero floating-point constant. Other kinds of element are either
3602 // already in registers or are materialized on demand. The threshold at which
3603 // a vector load is more desirable than several scalar materializion and
3604 // vector-insertion instructions is not known.
3605 unsigned NumScalarLoads = 0;
3606
3607 for (SDValue V : Op->op_values()) {
3608 if (V.isUndef())
3609 continue;
3610
3611 unsigned &Count = ValueCounts[V];
3612 if (0 == Count)
3613 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3614 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3615
3616 // Is this value dominant? In case of a tie, prefer the highest element as
3617 // it's cheaper to insert near the beginning of a vector than it is at the
3618 // end.
3619 if (++Count >= MostCommonCount) {
3620 DominantValue = V;
3621 MostCommonCount = Count;
3622 }
3623 }
3624
3625 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3626 unsigned NumDefElts = NumElts - NumUndefElts;
3627 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3628
3629 // Don't perform this optimization when optimizing for size, since
3630 // materializing elements and inserting them tends to cause code bloat.
3631 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3632 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3633 ((MostCommonCount > DominantValueCountThreshold) ||
3634 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3635 // Start by splatting the most common element.
3636 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3637
3638 DenseSet<SDValue> Processed{DominantValue};
3639
3640 // We can handle an insert into the last element (of a splat) via
3641 // v(f)slide1down. This is slightly better than the vslideup insert
3642 // lowering as it avoids the need for a vector group temporary. It
3643 // is also better than using vmerge.vx as it avoids the need to
3644 // materialize the mask in a vector register.
3645 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3646 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3647 LastOp != DominantValue) {
3648 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3649 auto OpCode =
3651 if (!VT.isFloatingPoint())
3652 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3653 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3654 LastOp, Mask, VL);
3655 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3656 Processed.insert(LastOp);
3657 }
3658
3659 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3660 for (const auto &OpIdx : enumerate(Op->ops())) {
3661 const SDValue &V = OpIdx.value();
3662 if (V.isUndef() || !Processed.insert(V).second)
3663 continue;
3664 if (ValueCounts[V] == 1) {
3665 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
3666 DAG.getVectorIdxConstant(OpIdx.index(), DL));
3667 } else {
3668 // Blend in all instances of this value using a VSELECT, using a
3669 // mask where each bit signals whether that element is the one
3670 // we're after.
3672 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3673 return DAG.getConstant(V == V1, DL, XLenVT);
3674 });
3675 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3676 DAG.getBuildVector(SelMaskTy, DL, Ops),
3677 DAG.getSplatBuildVector(VT, DL, V), Vec);
3678 }
3679 }
3680
3681 return Vec;
3682 }
3683
3684 return SDValue();
3685}
3686
3688 const RISCVSubtarget &Subtarget) {
3689 MVT VT = Op.getSimpleValueType();
3690 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3691
3692 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3693
3694 SDLoc DL(Op);
3695 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3696
3697 MVT XLenVT = Subtarget.getXLenVT();
3698 unsigned NumElts = Op.getNumOperands();
3699
3700 if (VT.getVectorElementType() == MVT::i1) {
3701 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
3702 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
3703 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
3704 }
3705
3706 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
3707 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
3708 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3709 }
3710
3711 // Lower constant mask BUILD_VECTORs via an integer vector type, in
3712 // scalar integer chunks whose bit-width depends on the number of mask
3713 // bits and XLEN.
3714 // First, determine the most appropriate scalar integer type to use. This
3715 // is at most XLenVT, but may be shrunk to a smaller vector element type
3716 // according to the size of the final vector - use i8 chunks rather than
3717 // XLenVT if we're producing a v8i1. This results in more consistent
3718 // codegen across RV32 and RV64.
3719 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
3720 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
3721 // If we have to use more than one INSERT_VECTOR_ELT then this
3722 // optimization is likely to increase code size; avoid peforming it in
3723 // such a case. We can use a load from a constant pool in this case.
3724 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3725 return SDValue();
3726 // Now we can create our integer vector type. Note that it may be larger
3727 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3728 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
3729 MVT IntegerViaVecVT =
3730 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
3731 IntegerViaVecElts);
3732
3733 uint64_t Bits = 0;
3734 unsigned BitPos = 0, IntegerEltIdx = 0;
3735 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
3736
3737 for (unsigned I = 0; I < NumElts;) {
3738 SDValue V = Op.getOperand(I);
3739 bool BitValue = !V.isUndef() && V->getAsZExtVal();
3740 Bits |= ((uint64_t)BitValue << BitPos);
3741 ++BitPos;
3742 ++I;
3743
3744 // Once we accumulate enough bits to fill our scalar type or process the
3745 // last element, insert into our vector and clear our accumulated data.
3746 if (I % NumViaIntegerBits == 0 || I == NumElts) {
3747 if (NumViaIntegerBits <= 32)
3748 Bits = SignExtend64<32>(Bits);
3749 SDValue Elt = DAG.getSignedConstant(Bits, DL, XLenVT);
3750 Elts[IntegerEltIdx] = Elt;
3751 Bits = 0;
3752 BitPos = 0;
3753 IntegerEltIdx++;
3754 }
3755 }
3756
3757 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
3758
3759 if (NumElts < NumViaIntegerBits) {
3760 // If we're producing a smaller vector than our minimum legal integer
3761 // type, bitcast to the equivalent (known-legal) mask type, and extract
3762 // our final mask.
3763 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3764 Vec = DAG.getBitcast(MVT::v8i1, Vec);
3765 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3766 DAG.getConstant(0, DL, XLenVT));
3767 } else {
3768 // Else we must have produced an integer type with the same size as the
3769 // mask type; bitcast for the final result.
3770 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3771 Vec = DAG.getBitcast(VT, Vec);
3772 }
3773
3774 return Vec;
3775 }
3776
3777 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3778 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3780 if (!VT.isFloatingPoint())
3781 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3782 Splat =
3783 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3784 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3785 }
3786
3787 // Try and match index sequences, which we can lower to the vid instruction
3788 // with optional modifications. An all-undef vector is matched by
3789 // getSplatValue, above.
3790 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3791 int64_t StepNumerator = SimpleVID->StepNumerator;
3792 unsigned StepDenominator = SimpleVID->StepDenominator;
3793 int64_t Addend = SimpleVID->Addend;
3794
3795 assert(StepNumerator != 0 && "Invalid step");
3796 bool Negate = false;
3797 int64_t SplatStepVal = StepNumerator;
3798 unsigned StepOpcode = ISD::MUL;
3799 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3800 // anyway as the shift of 63 won't fit in uimm5.
3801 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3802 isPowerOf2_64(std::abs(StepNumerator))) {
3803 Negate = StepNumerator < 0;
3804 StepOpcode = ISD::SHL;
3805 SplatStepVal = Log2_64(std::abs(StepNumerator));
3806 }
3807
3808 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3809 // threshold since it's the immediate value many RVV instructions accept.
3810 // There is no vmul.vi instruction so ensure multiply constant can fit in
3811 // a single addi instruction.
3812 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3813 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3814 isPowerOf2_32(StepDenominator) &&
3815 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3816 MVT VIDVT =
3818 MVT VIDContainerVT =
3819 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3820 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3821 // Convert right out of the scalable type so we can use standard ISD
3822 // nodes for the rest of the computation. If we used scalable types with
3823 // these, we'd lose the fixed-length vector info and generate worse
3824 // vsetvli code.
3825 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3826 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3827 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3828 SDValue SplatStep = DAG.getSignedConstant(SplatStepVal, DL, VIDVT);
3829 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3830 }
3831 if (StepDenominator != 1) {
3832 SDValue SplatStep =
3833 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3834 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3835 }
3836 if (Addend != 0 || Negate) {
3837 SDValue SplatAddend = DAG.getSignedConstant(Addend, DL, VIDVT);
3838 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3839 VID);
3840 }
3841 if (VT.isFloatingPoint()) {
3842 // TODO: Use vfwcvt to reduce register pressure.
3843 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3844 }
3845 return VID;
3846 }
3847 }
3848
3849 // For very small build_vectors, use a single scalar insert of a constant.
3850 // TODO: Base this on constant rematerialization cost, not size.
3851 const unsigned EltBitSize = VT.getScalarSizeInBits();
3852 if (VT.getSizeInBits() <= 32 &&
3854 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
3855 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
3856 "Unexpected sequence type");
3857 // If we can use the original VL with the modified element type, this
3858 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3859 // be moved into InsertVSETVLI?
3860 unsigned ViaVecLen =
3861 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
3862 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3863
3864 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3865 uint64_t SplatValue = 0;
3866 // Construct the amalgamated value at this larger vector type.
3867 for (const auto &OpIdx : enumerate(Op->op_values())) {
3868 const auto &SeqV = OpIdx.value();
3869 if (!SeqV.isUndef())
3870 SplatValue |=
3871 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
3872 }
3873
3874 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3875 // achieve better constant materializion.
3876 // On RV32, we need to sign-extend to use getSignedConstant.
3877 if (ViaIntVT == MVT::i32)
3878 SplatValue = SignExtend64<32>(SplatValue);
3879
3880 SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT,
3881 DAG.getUNDEF(ViaVecVT),
3882 DAG.getSignedConstant(SplatValue, DL, XLenVT),
3883 DAG.getVectorIdxConstant(0, DL));
3884 if (ViaVecLen != 1)
3886 MVT::getVectorVT(ViaIntVT, 1), Vec,
3887 DAG.getConstant(0, DL, XLenVT));
3888 return DAG.getBitcast(VT, Vec);
3889 }
3890
3891
3892 // Attempt to detect "hidden" splats, which only reveal themselves as splats
3893 // when re-interpreted as a vector with a larger element type. For example,
3894 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3895 // could be instead splat as
3896 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
3897 // TODO: This optimization could also work on non-constant splats, but it
3898 // would require bit-manipulation instructions to construct the splat value.
3899 SmallVector<SDValue> Sequence;
3900 const auto *BV = cast<BuildVectorSDNode>(Op);
3901 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
3903 BV->getRepeatedSequence(Sequence) &&
3904 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
3905 unsigned SeqLen = Sequence.size();
3906 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
3907 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3908 ViaIntVT == MVT::i64) &&
3909 "Unexpected sequence type");
3910
3911 // If we can use the original VL with the modified element type, this
3912 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3913 // be moved into InsertVSETVLI?
3914 const unsigned RequiredVL = NumElts / SeqLen;
3915 const unsigned ViaVecLen =
3916 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
3917 NumElts : RequiredVL;
3918 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3919
3920 unsigned EltIdx = 0;
3921 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3922 uint64_t SplatValue = 0;
3923 // Construct the amalgamated value which can be splatted as this larger
3924 // vector type.
3925 for (const auto &SeqV : Sequence) {
3926 if (!SeqV.isUndef())
3927 SplatValue |=
3928 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
3929 EltIdx++;
3930 }
3931
3932 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3933 // achieve better constant materializion.
3934 // On RV32, we need to sign-extend to use getSignedConstant.
3935 if (ViaIntVT == MVT::i32)
3936 SplatValue = SignExtend64<32>(SplatValue);
3937
3938 // Since we can't introduce illegal i64 types at this stage, we can only
3939 // perform an i64 splat on RV32 if it is its own sign-extended value. That
3940 // way we can use RVV instructions to splat.
3941 assert((ViaIntVT.bitsLE(XLenVT) ||
3942 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
3943 "Unexpected bitcast sequence");
3944 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
3945 SDValue ViaVL =
3946 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
3947 MVT ViaContainerVT =
3948 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
3949 SDValue Splat =
3950 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
3951 DAG.getUNDEF(ViaContainerVT),
3952 DAG.getSignedConstant(SplatValue, DL, XLenVT), ViaVL);
3953 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
3954 if (ViaVecLen != RequiredVL)
3956 MVT::getVectorVT(ViaIntVT, RequiredVL), Splat,
3957 DAG.getConstant(0, DL, XLenVT));
3958 return DAG.getBitcast(VT, Splat);
3959 }
3960 }
3961
3962 // If the number of signbits allows, see if we can lower as a <N x i8>.
3963 // Our main goal here is to reduce LMUL (and thus work) required to
3964 // build the constant, but we will also narrow if the resulting
3965 // narrow vector is known to materialize cheaply.
3966 // TODO: We really should be costing the smaller vector. There are
3967 // profitable cases this misses.
3968 if (EltBitSize > 8 && VT.isInteger() &&
3969 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen()) &&
3970 DAG.ComputeMaxSignificantBits(Op) <= 8) {
3971 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
3972 DL, Op->ops());
3973 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
3974 Source, DAG, Subtarget);
3975 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
3976 return convertFromScalableVector(VT, Res, DAG, Subtarget);
3977 }
3978
3979 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3980 return Res;
3981
3982 // For constant vectors, use generic constant pool lowering. Otherwise,
3983 // we'd have to materialize constants in GPRs just to move them into the
3984 // vector.
3985 return SDValue();
3986}
3987
3988static unsigned getPACKOpcode(unsigned DestBW,
3989 const RISCVSubtarget &Subtarget) {
3990 switch (DestBW) {
3991 default:
3992 llvm_unreachable("Unsupported pack size");
3993 case 16:
3994 return RISCV::PACKH;
3995 case 32:
3996 return Subtarget.is64Bit() ? RISCV::PACKW : RISCV::PACK;
3997 case 64:
3998 assert(Subtarget.is64Bit());
3999 return RISCV::PACK;
4000 }
4001}
4002
4003/// Double the element size of the build vector to reduce the number
4004/// of vslide1down in the build vector chain. In the worst case, this
4005/// trades three scalar operations for 1 vector operation. Scalar
4006/// operations are generally lower latency, and for out-of-order cores
4007/// we also benefit from additional parallelism.
4009 const RISCVSubtarget &Subtarget) {
4010 SDLoc DL(Op);
4011 MVT VT = Op.getSimpleValueType();
4012 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4013 MVT ElemVT = VT.getVectorElementType();
4014 if (!ElemVT.isInteger())
4015 return SDValue();
4016
4017 // TODO: Relax these architectural restrictions, possibly with costing
4018 // of the actual instructions required.
4019 if (!Subtarget.hasStdExtZbb() || !Subtarget.hasStdExtZba())
4020 return SDValue();
4021
4022 unsigned NumElts = VT.getVectorNumElements();
4023 unsigned ElemSizeInBits = ElemVT.getSizeInBits();
4024 if (ElemSizeInBits >= std::min(Subtarget.getELen(), Subtarget.getXLen()) ||
4025 NumElts % 2 != 0)
4026 return SDValue();
4027
4028 // Produce [B,A] packed into a type twice as wide. Note that all
4029 // scalars are XLenVT, possibly masked (see below).
4030 MVT XLenVT = Subtarget.getXLenVT();
4031 SDValue Mask = DAG.getConstant(
4032 APInt::getLowBitsSet(XLenVT.getSizeInBits(), ElemSizeInBits), DL, XLenVT);
4033 auto pack = [&](SDValue A, SDValue B) {
4034 // Bias the scheduling of the inserted operations to near the
4035 // definition of the element - this tends to reduce register
4036 // pressure overall.
4037 SDLoc ElemDL(B);
4038 if (Subtarget.hasStdExtZbkb())
4039 // Note that we're relying on the high bits of the result being
4040 // don't care. For PACKW, the result is *sign* extended.
4041 return SDValue(
4042 DAG.getMachineNode(getPACKOpcode(ElemSizeInBits * 2, Subtarget),
4043 ElemDL, XLenVT, A, B),
4044 0);
4045
4046 A = DAG.getNode(ISD::AND, SDLoc(A), XLenVT, A, Mask);
4047 B = DAG.getNode(ISD::AND, SDLoc(B), XLenVT, B, Mask);
4048 SDValue ShtAmt = DAG.getConstant(ElemSizeInBits, ElemDL, XLenVT);
4049 return DAG.getNode(ISD::OR, ElemDL, XLenVT, A,
4050 DAG.getNode(ISD::SHL, ElemDL, XLenVT, B, ShtAmt),
4052 };
4053
4054 SmallVector<SDValue> NewOperands;
4055 NewOperands.reserve(NumElts / 2);
4056 for (unsigned i = 0; i < VT.getVectorNumElements(); i += 2)
4057 NewOperands.push_back(pack(Op.getOperand(i), Op.getOperand(i + 1)));
4058 assert(NumElts == NewOperands.size() * 2);
4059 MVT WideVT = MVT::getIntegerVT(ElemSizeInBits * 2);
4060 MVT WideVecVT = MVT::getVectorVT(WideVT, NumElts / 2);
4061 return DAG.getNode(ISD::BITCAST, DL, VT,
4062 DAG.getBuildVector(WideVecVT, DL, NewOperands));
4063}
4064
4066 const RISCVSubtarget &Subtarget) {
4067 MVT VT = Op.getSimpleValueType();
4068 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4069
4070 MVT EltVT = VT.getVectorElementType();
4071 MVT XLenVT = Subtarget.getXLenVT();
4072
4073 SDLoc DL(Op);
4074
4075 // Proper support for f16 requires Zvfh. bf16 always requires special
4076 // handling. We need to cast the scalar to integer and create an integer
4077 // build_vector.
4078 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) || EltVT == MVT::bf16) {
4079 MVT IVT = VT.changeVectorElementType(MVT::i16);
4081 for (unsigned I = 0, E = Op.getNumOperands(); I != E; ++I) {
4082 SDValue Elem = Op.getOperand(I);
4083 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4084 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin())) {
4085 // Called by LegalizeDAG, we need to use XLenVT operations since we
4086 // can't create illegal types.
4087 if (auto *C = dyn_cast<ConstantFPSDNode>(Elem)) {
4088 // Manually constant fold so the integer build_vector can be lowered
4089 // better. Waiting for DAGCombine will be too late.
4090 APInt V =
4091 C->getValueAPF().bitcastToAPInt().sext(XLenVT.getSizeInBits());
4092 NewOps[I] = DAG.getConstant(V, DL, XLenVT);
4093 } else {
4094 NewOps[I] = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Elem);
4095 }
4096 } else {
4097 // Called by scalar type legalizer, we can use i16.
4098 NewOps[I] = DAG.getBitcast(MVT::i16, Op.getOperand(I));
4099 }
4100 }
4101 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, DL, IVT, NewOps);
4102 return DAG.getBitcast(VT, Res);
4103 }
4104
4105 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
4107 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
4108
4109 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4110
4111 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4112
4113 if (VT.getVectorElementType() == MVT::i1) {
4114 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
4115 // vector type, we have a legal equivalently-sized i8 type, so we can use
4116 // that.
4117 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
4118 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
4119
4120 SDValue WideVec;
4121 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
4122 // For a splat, perform a scalar truncate before creating the wider
4123 // vector.
4124 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
4125 DAG.getConstant(1, DL, Splat.getValueType()));
4126 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
4127 } else {
4128 SmallVector<SDValue, 8> Ops(Op->op_values());
4129 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
4130 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
4131 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
4132 }
4133
4134 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
4135 }
4136
4137 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
4138 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
4139 return Gather;
4140 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
4142 if (!VT.isFloatingPoint())
4143 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4144 Splat =
4145 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
4146 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4147 }
4148
4149 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4150 return Res;
4151
4152 // If we're compiling for an exact VLEN value, we can split our work per
4153 // register in the register group.
4154 if (const auto VLen = Subtarget.getRealVLen();
4155 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
4156 MVT ElemVT = VT.getVectorElementType();
4157 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4158 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4159 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4160 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4161 assert(M1VT == getLMUL1VT(M1VT));
4162
4163 // The following semantically builds up a fixed length concat_vector
4164 // of the component build_vectors. We eagerly lower to scalable and
4165 // insert_subvector here to avoid DAG combining it back to a large
4166 // build_vector.
4167 SmallVector<SDValue> BuildVectorOps(Op->ops());
4168 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4169 SDValue Vec = DAG.getUNDEF(ContainerVT);
4170 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
4171 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
4172 SDValue SubBV =
4173 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
4174 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
4175 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
4176 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubBV,
4177 DAG.getVectorIdxConstant(InsertIdx, DL));
4178 }
4179 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4180 }
4181
4182 // If we're about to resort to vslide1down (or stack usage), pack our
4183 // elements into the widest scalar type we can. This will force a VL/VTYPE
4184 // toggle, but reduces the critical path, the number of vslide1down ops
4185 // required, and possibly enables scalar folds of the values.
4186 if (SDValue Res = lowerBuildVectorViaPacking(Op, DAG, Subtarget))
4187 return Res;
4188
4189 // For m1 vectors, if we have non-undef values in both halves of our vector,
4190 // split the vector into low and high halves, build them separately, then
4191 // use a vselect to combine them. For long vectors, this cuts the critical
4192 // path of the vslide1down sequence in half, and gives us an opportunity
4193 // to special case each half independently. Note that we don't change the
4194 // length of the sub-vectors here, so if both fallback to the generic
4195 // vslide1down path, we should be able to fold the vselect into the final
4196 // vslidedown (for the undef tail) for the first half w/ masking.
4197 unsigned NumElts = VT.getVectorNumElements();
4198 unsigned NumUndefElts =
4199 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
4200 unsigned NumDefElts = NumElts - NumUndefElts;
4201 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
4202 ContainerVT.bitsLE(getLMUL1VT(ContainerVT))) {
4203 SmallVector<SDValue> SubVecAOps, SubVecBOps;
4204 SmallVector<SDValue> MaskVals;
4205 SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
4206 SubVecAOps.reserve(NumElts);
4207 SubVecBOps.reserve(NumElts);
4208 for (unsigned i = 0; i < NumElts; i++) {
4209 SDValue Elem = Op->getOperand(i);
4210 if (i < NumElts / 2) {
4211 SubVecAOps.push_back(Elem);
4212 SubVecBOps.push_back(UndefElem);
4213 } else {
4214 SubVecAOps.push_back(UndefElem);
4215 SubVecBOps.push_back(Elem);
4216 }
4217 bool SelectMaskVal = (i < NumElts / 2);
4218 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4219 }
4220 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
4221 MaskVals.size() == NumElts);
4222
4223 SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
4224 SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
4225 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4226 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
4227 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
4228 }
4229
4230 // Cap the cost at a value linear to the number of elements in the vector.
4231 // The default lowering is to use the stack. The vector store + scalar loads
4232 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
4233 // being (at least) linear in LMUL. As a result, using the vslidedown
4234 // lowering for every element ends up being VL*LMUL..
4235 // TODO: Should we be directly costing the stack alternative? Doing so might
4236 // give us a more accurate upper bound.
4237 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
4238
4239 // TODO: unify with TTI getSlideCost.
4240 InstructionCost PerSlideCost = 1;
4241 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
4242 default: break;
4244 PerSlideCost = 2;
4245 break;
4247 PerSlideCost = 4;
4248 break;
4250 PerSlideCost = 8;
4251 break;
4252 }
4253
4254 // TODO: Should we be using the build instseq then cost + evaluate scheme
4255 // we use for integer constants here?
4256 unsigned UndefCount = 0;
4257 for (const SDValue &V : Op->ops()) {
4258 if (V.isUndef()) {
4259 UndefCount++;
4260 continue;
4261 }
4262 if (UndefCount) {
4263 LinearBudget -= PerSlideCost;
4264 UndefCount = 0;
4265 }
4266 LinearBudget -= PerSlideCost;
4267 }
4268 if (UndefCount) {
4269 LinearBudget -= PerSlideCost;
4270 }
4271
4272 if (LinearBudget < 0)
4273 return SDValue();
4274
4275 assert((!VT.isFloatingPoint() ||
4276 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4277 "Illegal type which will result in reserved encoding");
4278
4279 const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
4280
4281 SDValue Vec;
4282 UndefCount = 0;
4283 for (SDValue V : Op->ops()) {
4284 if (V.isUndef()) {
4285 UndefCount++;
4286 continue;
4287 }
4288
4289 // Start our sequence with a TA splat in the hopes that hardware is able to
4290 // recognize there's no dependency on the prior value of our temporary
4291 // register.
4292 if (!Vec) {
4293 Vec = DAG.getSplatVector(VT, DL, V);
4294 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4295 UndefCount = 0;
4296 continue;
4297 }
4298
4299 if (UndefCount) {
4300 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4301 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4302 Vec, Offset, Mask, VL, Policy);
4303 UndefCount = 0;
4304 }
4305 auto OpCode =
4307 if (!VT.isFloatingPoint())
4308 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
4309 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4310 V, Mask, VL);
4311 }
4312 if (UndefCount) {
4313 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4314 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4315 Vec, Offset, Mask, VL, Policy);
4316 }
4317 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4318}
4319
4320static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4322 SelectionDAG &DAG) {
4323 if (!Passthru)
4324 Passthru = DAG.getUNDEF(VT);
4325 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
4326 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4327 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4328 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4329 // node in order to try and match RVV vector/scalar instructions.
4330 if ((LoC >> 31) == HiC)
4331 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4332
4333 // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
4334 // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
4335 // vlmax vsetvli or vsetivli to change the VL.
4336 // FIXME: Support larger constants?
4337 // FIXME: Support non-constant VLs by saturating?
4338 if (LoC == HiC) {
4339 SDValue NewVL;
4340 if (isAllOnesConstant(VL) ||
4341 (isa<RegisterSDNode>(VL) &&
4342 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
4343 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4344 else if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
4345 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
4346
4347 if (NewVL) {
4348 MVT InterVT =
4349 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4350 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4351 DAG.getUNDEF(InterVT), Lo, NewVL);
4352 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
4353 }
4354 }
4355 }
4356
4357 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4358 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4359 isa<ConstantSDNode>(Hi.getOperand(1)) &&
4360 Hi.getConstantOperandVal(1) == 31)
4361 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4362
4363 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
4364 // even if it might be sign extended.
4365 if (Hi.isUndef())
4366 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4367
4368 // Fall back to a stack store and stride x0 vector load.
4369 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
4370 Hi, VL);
4371}
4372
4373// Called by type legalization to handle splat of i64 on RV32.
4374// FIXME: We can optimize this when the type has sign or zero bits in one
4375// of the halves.
4376static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4377 SDValue Scalar, SDValue VL,
4378 SelectionDAG &DAG) {
4379 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4380 SDValue Lo, Hi;
4381 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
4382 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4383}
4384
4385// This function lowers a splat of a scalar operand Splat with the vector
4386// length VL. It ensures the final sequence is type legal, which is useful when
4387// lowering a splat after type legalization.
4388static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4389 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4390 const RISCVSubtarget &Subtarget) {
4391 bool HasPassthru = Passthru && !Passthru.isUndef();
4392 if (!HasPassthru && !Passthru)
4393 Passthru = DAG.getUNDEF(VT);
4394
4395 MVT EltVT = VT.getVectorElementType();
4396 MVT XLenVT = Subtarget.getXLenVT();
4397
4398 if (VT.isFloatingPoint()) {
4399 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
4400 EltVT == MVT::bf16) {
4401 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4402 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
4403 Scalar = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Scalar);
4404 else
4405 Scalar = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Scalar);
4406 MVT IVT = VT.changeVectorElementType(MVT::i16);
4407 Passthru = DAG.getNode(ISD::BITCAST, DL, IVT, Passthru);
4408 SDValue Splat =
4409 lowerScalarSplat(Passthru, Scalar, VL, IVT, DL, DAG, Subtarget);
4410 return DAG.getNode(ISD::BITCAST, DL, VT, Splat);
4411 }
4412 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
4413 }
4414
4415 // Simplest case is that the operand needs to be promoted to XLenVT.
4416 if (Scalar.getValueType().bitsLE(XLenVT)) {
4417 // If the operand is a constant, sign extend to increase our chances
4418 // of being able to use a .vi instruction. ANY_EXTEND would become a
4419 // a zero extend and the simm5 check in isel would fail.
4420 // FIXME: Should we ignore the upper bits in isel instead?
4421 unsigned ExtOpc =
4422 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4423 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4424 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
4425 }
4426
4427 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4428 "Unexpected scalar for splat lowering!");
4429
4430 if (isOneConstant(VL) && isNullConstant(Scalar))
4431 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
4432 DAG.getConstant(0, DL, XLenVT), VL);
4433
4434 // Otherwise use the more complicated splatting algorithm.
4435 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4436}
4437
4438// This function lowers an insert of a scalar operand Scalar into lane
4439// 0 of the vector regardless of the value of VL. The contents of the
4440// remaining lanes of the result vector are unspecified. VL is assumed
4441// to be non-zero.
4443 const SDLoc &DL, SelectionDAG &DAG,
4444 const RISCVSubtarget &Subtarget) {
4445 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4446
4447 const MVT XLenVT = Subtarget.getXLenVT();
4448 SDValue Passthru = DAG.getUNDEF(VT);
4449
4450 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4451 isNullConstant(Scalar.getOperand(1))) {
4452 SDValue ExtractedVal = Scalar.getOperand(0);
4453 // The element types must be the same.
4454 if (ExtractedVal.getValueType().getVectorElementType() ==
4455 VT.getVectorElementType()) {
4456 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4457 MVT ExtractedContainerVT = ExtractedVT;
4458 if (ExtractedContainerVT.isFixedLengthVector()) {
4459 ExtractedContainerVT = getContainerForFixedLengthVector(
4460 DAG, ExtractedContainerVT, Subtarget);
4461 ExtractedVal = convertToScalableVector(ExtractedContainerVT,
4462 ExtractedVal, DAG, Subtarget);
4463 }
4464 if (ExtractedContainerVT.bitsLE(VT))
4465 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru,
4466 ExtractedVal, DAG.getVectorIdxConstant(0, DL));
4467 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal,
4468 DAG.getVectorIdxConstant(0, DL));
4469 }
4470 }
4471
4472
4473 if (VT.isFloatingPoint())
4474 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT,
4475 DAG.getUNDEF(VT), Scalar, VL);
4476
4477 // Avoid the tricky legalization cases by falling back to using the
4478 // splat code which already handles it gracefully.
4479 if (!Scalar.getValueType().bitsLE(XLenVT))
4480 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
4481 DAG.getConstant(1, DL, XLenVT),
4482 VT, DL, DAG, Subtarget);
4483
4484 // If the operand is a constant, sign extend to increase our chances
4485 // of being able to use a .vi instruction. ANY_EXTEND would become a
4486 // a zero extend and the simm5 check in isel would fail.
4487 // FIXME: Should we ignore the upper bits in isel instead?
4488 unsigned ExtOpc =
4489 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4490 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4491 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
4492 VL);
4493}
4494
4495// Can this shuffle be performed on exactly one (possibly larger) input?
4496static SDValue getSingleShuffleSrc(MVT VT, MVT ContainerVT, SDValue V1,
4497 SDValue V2) {
4498
4499 if (V2.isUndef() &&
4501 return V1;
4502
4503 // Both input must be extracts.
4504 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4505 V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
4506 return SDValue();
4507
4508 // Extracting from the same source.
4509 SDValue Src = V1.getOperand(0);
4510 if (Src != V2.getOperand(0))
4511 return SDValue();
4512
4513 // Src needs to have twice the number of elements.
4514 unsigned NumElts = VT.getVectorNumElements();
4515 if (Src.getValueType().getVectorNumElements() != (NumElts * 2))
4516 return SDValue();
4517
4518 // The extracts must extract the two halves of the source.
4519 if (V1.getConstantOperandVal(1) != 0 ||
4520 V2.getConstantOperandVal(1) != NumElts)
4521 return SDValue();
4522
4523 return Src;
4524}
4525
4526/// Is this shuffle interleaving contiguous elements from one vector into the
4527/// even elements and contiguous elements from another vector into the odd
4528/// elements. \p EvenSrc will contain the element that should be in the first
4529/// even element. \p OddSrc will contain the element that should be in the first
4530/// odd element. These can be the first element in a source or the element half
4531/// way through the source.
4532static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4533 int &OddSrc, const RISCVSubtarget &Subtarget) {
4534 // We need to be able to widen elements to the next larger integer type.
4535 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4536 return false;
4537
4538 int Size = Mask.size();
4539 int NumElts = VT.getVectorNumElements();
4540 assert(Size == (int)NumElts && "Unexpected mask size");
4541
4542 SmallVector<unsigned, 2> StartIndexes;
4543 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
4544 return false;
4545
4546 EvenSrc = StartIndexes[0];
4547 OddSrc = StartIndexes[1];
4548
4549 // One source should be low half of first vector.
4550 if (EvenSrc != 0 && OddSrc != 0)
4551 return false;
4552
4553 // Subvectors will be subtracted from either at the start of the two input
4554 // vectors, or at the start and middle of the first vector if it's an unary
4555 // interleave.
4556 // In both cases, HalfNumElts will be extracted.
4557 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4558 // we'll create an illegal extract_subvector.
4559 // FIXME: We could support other values using a slidedown first.
4560 int HalfNumElts = NumElts / 2;
4561 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4562}
4563
4564/// Match shuffles that concatenate two vectors, rotate the concatenation,
4565/// and then extract the original number of elements from the rotated result.
4566/// This is equivalent to vector.splice or X86's PALIGNR instruction. The
4567/// returned rotation amount is for a rotate right, where elements move from
4568/// higher elements to lower elements. \p LoSrc indicates the first source
4569/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
4570/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
4571/// 0 or 1 if a rotation is found.
4572///
4573/// NOTE: We talk about rotate to the right which matches how bit shift and
4574/// rotate instructions are described where LSBs are on the right, but LLVM IR
4575/// and the table below write vectors with the lowest elements on the left.
4576static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
4577 int Size = Mask.size();
4578
4579 // We need to detect various ways of spelling a rotation:
4580 // [11, 12, 13, 14, 15, 0, 1, 2]
4581 // [-1, 12, 13, 14, -1, -1, 1, -1]
4582 // [-1, -1, -1, -1, -1, -1, 1, 2]
4583 // [ 3, 4, 5, 6, 7, 8, 9, 10]
4584 // [-1, 4, 5, 6, -1, -1, 9, -1]
4585 // [-1, 4, 5, 6, -1, -1, -1, -1]
4586 int Rotation = 0;
4587 LoSrc = -1;
4588 HiSrc = -1;
4589 for (int i = 0; i != Size; ++i) {
4590 int M = Mask[i];
4591 if (M < 0)
4592 continue;
4593
4594 // Determine where a rotate vector would have started.
4595 int StartIdx = i - (M % Size);
4596 // The identity rotation isn't interesting, stop.
4597 if (StartIdx == 0)
4598 return -1;
4599
4600 // If we found the tail of a vector the rotation must be the missing
4601 // front. If we found the head of a vector, it must be how much of the
4602 // head.
4603 int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
4604
4605 if (Rotation == 0)
4606 Rotation = CandidateRotation;
4607 else if (Rotation != CandidateRotation)
4608 // The rotations don't match, so we can't match this mask.
4609 return -1;
4610
4611 // Compute which value this mask is pointing at.
4612 int MaskSrc = M < Size ? 0 : 1;
4613
4614 // Compute which of the two target values this index should be assigned to.
4615 // This reflects whether the high elements are remaining or the low elemnts
4616 // are remaining.
4617 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
4618
4619 // Either set up this value if we've not encountered it before, or check
4620 // that it remains consistent.
4621 if (TargetSrc < 0)
4622 TargetSrc = MaskSrc;
4623 else if (TargetSrc != MaskSrc)
4624 // This may be a rotation, but it pulls from the inputs in some
4625 // unsupported interleaving.
4626 return -1;
4627 }
4628
4629 // Check that we successfully analyzed the mask, and normalize the results.
4630 assert(Rotation != 0 && "Failed to locate a viable rotation!");
4631 assert((LoSrc >= 0 || HiSrc >= 0) &&
4632 "Failed to find a rotated input vector!");
4633
4634 return Rotation;
4635}
4636
4637// Lower a deinterleave shuffle to SRL and TRUNC. Factor must be
4638// 2, 4, 8 and the integer type Factor-times larger than VT's
4639// element type must be a legal element type.
4640// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (Factor=2, Index=0)
4641// -> [p, q, r, s] (Factor=2, Index=1)
4643 SDValue Src, unsigned Factor,
4644 unsigned Index, SelectionDAG &DAG) {
4645 unsigned EltBits = VT.getScalarSizeInBits();
4646 ElementCount SrcEC = Src.getValueType().getVectorElementCount();
4647 MVT WideSrcVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor),
4648 SrcEC.divideCoefficientBy(Factor));
4649 MVT ResVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits),
4650 SrcEC.divideCoefficientBy(Factor));
4651 Src = DAG.getBitcast(WideSrcVT, Src);
4652
4653 unsigned Shift = Index * EltBits;
4654 SDValue Res = DAG.getNode(ISD::SRL, DL, WideSrcVT, Src,
4655 DAG.getConstant(Shift, DL, WideSrcVT));
4656 Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT, Res);
4658 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, IntVT, DAG.getUNDEF(IntVT), Res,
4659 DAG.getVectorIdxConstant(0, DL));
4660 return DAG.getBitcast(VT, Res);
4661}
4662
4663// Lower the following shuffle to vslidedown.
4664// a)
4665// t49: v8i8 = extract_subvector t13, Constant:i64<0>
4666// t109: v8i8 = extract_subvector t13, Constant:i64<8>
4667// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
4668// b)
4669// t69: v16i16 = extract_subvector t68, Constant:i64<0>
4670// t23: v8i16 = extract_subvector t69, Constant:i64<0>
4671// t29: v4i16 = extract_subvector t23, Constant:i64<4>
4672// t26: v8i16 = extract_subvector t69, Constant:i64<8>
4673// t30: v4i16 = extract_subvector t26, Constant:i64<0>
4674// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
4676 SDValue V1, SDValue V2,
4677 ArrayRef<int> Mask,
4678 const RISCVSubtarget &Subtarget,
4679 SelectionDAG &DAG) {
4680 auto findNonEXTRACT_SUBVECTORParent =
4681 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
4682 uint64_t Offset = 0;
4683 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
4684 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
4685 // a scalable vector. But we don't want to match the case.
4686 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
4687 Offset += Parent.getConstantOperandVal(1);
4688 Parent = Parent.getOperand(0);
4689 }
4690 return std::make_pair(Parent, Offset);
4691 };
4692
4693 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
4694 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
4695
4696 // Extracting from the same source.
4697 SDValue Src = V1Src;
4698 if (Src != V2Src)
4699 return SDValue();
4700
4701 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
4702 SmallVector<int, 16> NewMask(Mask);
4703 for (size_t i = 0; i != NewMask.size(); ++i) {
4704 if (NewMask[i] == -1)
4705 continue;
4706
4707 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
4708 NewMask[i] = NewMask[i] + V1IndexOffset;
4709 } else {
4710 // Minus NewMask.size() is needed. Otherwise, the b case would be
4711 // <5,6,7,12> instead of <5,6,7,8>.
4712 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
4713 }
4714 }
4715
4716 // First index must be known and non-zero. It will be used as the slidedown
4717 // amount.
4718 if (NewMask[0] <= 0)
4719 return SDValue();
4720
4721 // NewMask is also continuous.
4722 for (unsigned i = 1; i != NewMask.size(); ++i)
4723 if (NewMask[i - 1] + 1 != NewMask[i])
4724 return SDValue();
4725
4726 MVT XLenVT = Subtarget.getXLenVT();
4727 MVT SrcVT = Src.getSimpleValueType();
4728 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
4729 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
4730 SDValue Slidedown =
4731 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4732 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
4733 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
4734 return DAG.getNode(
4736 convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget),
4737 DAG.getConstant(0, DL, XLenVT));
4738}
4739
4740// Because vslideup leaves the destination elements at the start intact, we can
4741// use it to perform shuffles that insert subvectors:
4742//
4743// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
4744// ->
4745// vsetvli zero, 8, e8, mf2, ta, ma
4746// vslideup.vi v8, v9, 4
4747//
4748// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
4749// ->
4750// vsetvli zero, 5, e8, mf2, tu, ma
4751// vslideup.v1 v8, v9, 2
4753 SDValue V1, SDValue V2,
4754 ArrayRef<int> Mask,
4755 const RISCVSubtarget &Subtarget,
4756 SelectionDAG &DAG) {
4757 unsigned NumElts = VT.getVectorNumElements();
4758 int NumSubElts, Index;
4759 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
4760 Index))
4761 return SDValue();
4762
4763 bool OpsSwapped = Mask[Index] < (int)NumElts;
4764 SDValue InPlace = OpsSwapped ? V2 : V1;
4765 SDValue ToInsert = OpsSwapped ? V1 : V2;
4766
4767 MVT XLenVT = Subtarget.getXLenVT();
4768 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4769 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
4770 // We slide up by the index that the subvector is being inserted at, and set
4771 // VL to the index + the number of elements being inserted.
4773 // If the we're adding a suffix to the in place vector, i.e. inserting right
4774 // up to the very end of it, then we don't actually care about the tail.
4775 if (NumSubElts + Index >= (int)NumElts)
4776 Policy |= RISCVII::TAIL_AGNOSTIC;
4777
4778 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
4779 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
4780 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
4781
4782 SDValue Res;
4783 // If we're inserting into the lowest elements, use a tail undisturbed
4784 // vmv.v.v.
4785 if (Index == 0)
4786 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
4787 VL);
4788 else
4789 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
4790 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
4791 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4792}
4793
4794/// Match v(f)slide1up/down idioms. These operations involve sliding
4795/// N-1 elements to make room for an inserted scalar at one end.
4797 SDValue V1, SDValue V2,
4798 ArrayRef<int> Mask,
4799 const RISCVSubtarget &Subtarget,
4800 SelectionDAG &DAG) {
4801 bool OpsSwapped = false;
4802 if (!isa<BuildVectorSDNode>(V1)) {
4803 if (!isa<BuildVectorSDNode>(V2))
4804 return SDValue();
4805 std::swap(V1, V2);
4806 OpsSwapped = true;
4807 }
4808 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
4809 if (!Splat)
4810 return SDValue();
4811
4812 // Return true if the mask could describe a slide of Mask.size() - 1
4813 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
4814 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
4815 const unsigned S = (Offset > 0) ? 0 : -Offset;
4816 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
4817 for (unsigned i = S; i != E; ++i)
4818 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
4819 return false;
4820 return true;
4821 };
4822
4823 const unsigned NumElts = VT.getVectorNumElements();
4824 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
4825 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
4826 return SDValue();
4827
4828 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
4829 // Inserted lane must come from splat, undef scalar is legal but not profitable.
4830 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
4831 return SDValue();
4832
4833 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4834 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4835
4836 // zvfhmin and zvfbfmin don't have vfslide1{down,up}.vf so use fmv.x.h +
4837 // vslide1{down,up}.vx instead.
4838 if (VT.getVectorElementType() == MVT::bf16 ||
4839 (VT.getVectorElementType() == MVT::f16 &&
4840 !Subtarget.hasVInstructionsF16())) {
4841 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
4842 Splat =
4843 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Splat);
4844 V2 = DAG.getBitcast(
4845 IntVT, convertToScalableVector(ContainerVT, V2, DAG, Subtarget));
4846 SDValue Vec = DAG.getNode(
4848 IntVT, DAG.getUNDEF(IntVT), V2, Splat, TrueMask, VL);
4849 Vec = DAG.getBitcast(ContainerVT, Vec);
4850 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4851 }
4852
4853 auto OpCode = IsVSlidedown ?
4856 if (!VT.isFloatingPoint())
4857 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
4858 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
4859 DAG.getUNDEF(ContainerVT),
4860 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
4861 Splat, TrueMask, VL);
4862 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4863}
4864
4865// Match a mask which "spreads" the leading elements of a vector evenly
4866// across the result. Factor is the spread amount, and Index is the
4867// offset applied. (on success, Index < Factor) This is the inverse
4868// of a deinterleave with the same Factor and Index. This is analogous
4869// to an interleave, except that all but one lane is undef.
4870static bool isSpreadMask(ArrayRef<int> Mask, unsigned Factor, unsigned &Index) {
4871 SmallVector<bool> LaneIsUndef(Factor, true);
4872 for (unsigned i = 0; i < Mask.size(); i++)
4873 LaneIsUndef[i % Factor] &= (Mask[i] == -1);
4874
4875 bool Found = false;
4876 for (unsigned i = 0; i < Factor; i++) {
4877 if (LaneIsUndef[i])
4878 continue;
4879 if (Found)
4880 return false;
4881 Index = i;
4882 Found = true;
4883 }
4884 if (!Found)
4885 return false;
4886
4887 for (unsigned i = 0; i < Mask.size() / Factor; i++) {
4888 unsigned j = i * Factor + Index;
4889 if (Mask[j] != -1 && (unsigned)Mask[j] != i)
4890 return false;
4891 }
4892 return true;
4893}
4894
4895// Given a vector a, b, c, d return a vector Factor times longer
4896// with Factor-1 undef's between elements. Ex:
4897// a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0)
4898// undef, a, undef, b, undef, c, undef, d (Factor=2, Index=1)
4899static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index,
4900 const SDLoc &DL, SelectionDAG &DAG) {
4901
4902 MVT VT = V.getSimpleValueType();
4903 unsigned EltBits = VT.getScalarSizeInBits();
4905 V = DAG.getBitcast(VT.changeTypeToInteger(), V);
4906
4907 MVT WideVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor), EC);
4908
4909 SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, V);
4910 // TODO: On rv32, the constant becomes a splat_vector_parts which does not
4911 // allow the SHL to fold away if Index is 0.
4912 if (Index != 0)
4913 Result = DAG.getNode(ISD::SHL, DL, WideVT, Result,
4914 DAG.getConstant(EltBits * Index, DL, WideVT));
4915 // Make sure to use original element type
4917 EC.multiplyCoefficientBy(Factor));
4918 return DAG.getBitcast(ResultVT, Result);
4919}
4920
4921// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
4922// to create an interleaved vector of <[vscale x] n*2 x ty>.
4923// This requires that the size of ty is less than the subtarget's maximum ELEN.
4925 const SDLoc &DL, SelectionDAG &DAG,
4926 const RISCVSubtarget &Subtarget) {
4927
4928 // FIXME: Not only does this optimize the code, it fixes some correctness
4929 // issues because MIR does not have freeze.
4930 if (EvenV.isUndef())
4931 return getWideningSpread(OddV, 2, 1, DL, DAG);
4932 if (OddV.isUndef())
4933 return getWideningSpread(EvenV, 2, 0, DL, DAG);
4934
4935 MVT VecVT = EvenV.getSimpleValueType();
4936 MVT VecContainerVT = VecVT; // <vscale x n x ty>
4937 // Convert fixed vectors to scalable if needed
4938 if (VecContainerVT.isFixedLengthVector()) {
4939 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
4940 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
4941 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
4942 }
4943
4944 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
4945
4946 // We're working with a vector of the same size as the resulting
4947 // interleaved vector, but with half the number of elements and
4948 // twice the SEW (Hence the restriction on not using the maximum
4949 // ELEN)
4950 MVT WideVT =
4952 VecVT.getVectorElementCount());
4953 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
4954 if (WideContainerVT.isFixedLengthVector())
4955 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
4956
4957 // Bitcast the input vectors to integers in case they are FP
4958 VecContainerVT = VecContainerVT.changeTypeToInteger();
4959 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
4960 OddV = DAG.getBitcast(VecContainerVT, OddV);
4961
4962 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
4963 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
4964
4965 SDValue Interleaved;
4966 if (Subtarget.hasStdExtZvbb()) {
4967 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
4968 SDValue OffsetVec =
4969 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
4970 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
4971 OffsetVec, Passthru, Mask, VL);
4972 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
4973 Interleaved, EvenV, Passthru, Mask, VL);
4974 } else {
4975 // FIXME: We should freeze the odd vector here. We already handled the case
4976 // of provably undef/poison above.
4977
4978 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
4979 // vwaddu.vv
4980 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
4981 OddV, Passthru, Mask, VL);
4982
4983 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
4984 SDValue AllOnesVec = DAG.getSplatVector(
4985 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
4986 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
4987 OddV, AllOnesVec, Passthru, Mask, VL);
4988
4989 // Add the two together so we get
4990 // (OddV * 0xff...ff) + (OddV + EvenV)
4991 // = (OddV * 0x100...00) + EvenV
4992 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
4993 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
4994 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
4995 Interleaved, OddsMul, Passthru, Mask, VL);
4996 }
4997
4998 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
4999 MVT ResultContainerVT = MVT::getVectorVT(
5000 VecVT.getVectorElementType(), // Make sure to use original type
5001 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
5002 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
5003
5004 // Convert back to a fixed vector if needed
5005 MVT ResultVT =
5008 if (ResultVT.isFixedLengthVector())
5009 Interleaved =
5010 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
5011
5012 return Interleaved;
5013}
5014
5015// If we have a vector of bits that we want to reverse, we can use a vbrev on a
5016// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
5018 SelectionDAG &DAG,
5019 const RISCVSubtarget &Subtarget) {
5020 SDLoc DL(SVN);
5021 MVT VT = SVN->getSimpleValueType(0);
5022 SDValue V = SVN->getOperand(0);
5023 unsigned NumElts = VT.getVectorNumElements();
5024
5025 assert(VT.getVectorElementType() == MVT::i1);
5026
5028 SVN->getMask().size()) ||
5029 !SVN->getOperand(1).isUndef())
5030 return SDValue();
5031
5032 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
5033 EVT ViaVT = EVT::getVectorVT(
5034 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
5035 EVT ViaBitVT =
5036 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
5037
5038 // If we don't have zvbb or the larger element type > ELEN, the operation will
5039 // be illegal.
5041 ViaVT) ||
5042 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
5043 return SDValue();
5044
5045 // If the bit vector doesn't fit exactly into the larger element type, we need
5046 // to insert it into the larger vector and then shift up the reversed bits
5047 // afterwards to get rid of the gap introduced.
5048 if (ViaEltSize > NumElts)
5049 V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ViaBitVT, DAG.getUNDEF(ViaBitVT),
5050 V, DAG.getVectorIdxConstant(0, DL));
5051
5052 SDValue Res =
5053 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
5054
5055 // Shift up the reversed bits if the vector didn't exactly fit into the larger
5056 // element type.
5057 if (ViaEltSize > NumElts)
5058 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
5059 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
5060
5061 Res = DAG.getBitcast(ViaBitVT, Res);
5062
5063 if (ViaEltSize > NumElts)
5064 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
5065 DAG.getVectorIdxConstant(0, DL));
5066 return Res;
5067}
5068
5070 SelectionDAG &DAG,
5071 const RISCVSubtarget &Subtarget,
5072 MVT &RotateVT, unsigned &RotateAmt) {
5073 SDLoc DL(SVN);
5074
5075 EVT VT = SVN->getValueType(0);
5076 unsigned NumElts = VT.getVectorNumElements();
5077 unsigned EltSizeInBits = VT.getScalarSizeInBits();
5078 unsigned NumSubElts;
5079 if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2,
5080 NumElts, NumSubElts, RotateAmt))
5081 return false;
5082 RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
5083 NumElts / NumSubElts);
5084
5085 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
5086 return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);
5087}
5088
5089// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
5090// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
5091// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
5093 SelectionDAG &DAG,
5094 const RISCVSubtarget &Subtarget) {
5095 SDLoc DL(SVN);
5096
5097 EVT VT = SVN->getValueType(0);
5098 unsigned RotateAmt;
5099 MVT RotateVT;
5100 if (!isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
5101 return SDValue();
5102
5103 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
5104
5105 SDValue Rotate;
5106 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
5107 // so canonicalize to vrev8.
5108 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
5109 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
5110 else
5111 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
5112 DAG.getConstant(RotateAmt, DL, RotateVT));
5113
5114 return DAG.getBitcast(VT, Rotate);
5115}
5116
5117// If compiling with an exactly known VLEN, see if we can split a
5118// shuffle on m2 or larger into a small number of m1 sized shuffles
5119// which write each destination registers exactly once.
5121 SelectionDAG &DAG,
5122 const RISCVSubtarget &Subtarget) {
5123 SDLoc DL(SVN);
5124 MVT VT = SVN->getSimpleValueType(0);
5125 SDValue V1 = SVN->getOperand(0);
5126 SDValue V2 = SVN->getOperand(1);
5127 ArrayRef<int> Mask = SVN->getMask();
5128
5129 // If we don't know exact data layout, not much we can do. If this
5130 // is already m1 or smaller, no point in splitting further.
5131 const auto VLen = Subtarget.getRealVLen();
5132 if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)
5133 return SDValue();
5134
5135 // Avoid picking up bitrotate patterns which we have a linear-in-lmul
5136 // expansion for.
5137 unsigned RotateAmt;
5138 MVT RotateVT;
5139 if (isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
5140 return SDValue();
5141
5142 MVT ElemVT = VT.getVectorElementType();
5143 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
5144
5145 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5146 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
5147 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
5148 assert(M1VT == getLMUL1VT(M1VT));
5149 unsigned NumOpElts = M1VT.getVectorMinNumElements();
5150 unsigned NumElts = ContainerVT.getVectorMinNumElements();
5151 unsigned NumOfSrcRegs = NumElts / NumOpElts;
5152 unsigned NumOfDestRegs = NumElts / NumOpElts;
5153 // The following semantically builds up a fixed length concat_vector
5154 // of the component shuffle_vectors. We eagerly lower to scalable here
5155 // to avoid DAG combining it back to a large shuffle_vector again.
5156 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5157 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
5159 Operands;
5161 Mask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs,
5162 [&]() { Operands.emplace_back(); },
5163 [&](ArrayRef<int> SrcSubMask, unsigned SrcVecIdx, unsigned DstVecIdx) {
5164 Operands.emplace_back().emplace_back(
5165 SrcVecIdx, UINT_MAX,
5166 SmallVector<int>(SrcSubMask.begin(), SrcSubMask.end()));
5167 },
5168 [&](ArrayRef<int> SrcSubMask, unsigned Idx1, unsigned Idx2, bool NewReg) {
5169 if (NewReg)
5170 Operands.emplace_back();
5171 Operands.back().emplace_back(
5172 Idx1, Idx2, SmallVector<int>(SrcSubMask.begin(), SrcSubMask.end()));
5173 });
5174 assert(Operands.size() == NumOfDestRegs && "Whole vector must be processed");
5175 // Note: check that we do not emit too many shuffles here to prevent code
5176 // size explosion.
5177 // TODO: investigate, if it can be improved by extra analysis of the masks to
5178 // check if the code is more profitable.
5179 unsigned NumShuffles = std::accumulate(
5180 Operands.begin(), Operands.end(), 0u,
5181 [&](unsigned N,
5182 ArrayRef<std::tuple<unsigned, unsigned, SmallVector<int>>> Data) {
5183 if (Data.empty())
5184 return N;
5185 N += Data.size();
5186 for (const auto &P : Data) {
5187 unsigned Idx2 = std::get<1>(P);
5188 ArrayRef<int> Mask = std::get<2>(P);
5189 if (Idx2 != UINT_MAX)
5190 ++N;
5191 else if (ShuffleVectorInst::isIdentityMask(Mask, Mask.size()))
5192 --N;
5193 }
5194 return N;
5195 });
5196 if ((NumOfDestRegs > 2 && NumShuffles > NumOfDestRegs) ||
5197 (NumOfDestRegs <= 2 && NumShuffles >= 4))
5198 return SDValue();
5199 auto ExtractValue = [&, &DAG = DAG](SDValue SrcVec, unsigned ExtractIdx) {
5200 SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec,
5201 DAG.getVectorIdxConstant(ExtractIdx, DL));
5202 SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
5203 return SubVec;
5204 };
5205 auto PerformShuffle = [&, &DAG = DAG](SDValue SubVec1, SDValue SubVec2,
5207 SDValue SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec1, SubVec2, Mask);
5208 return SubVec;
5209 };
5210 SDValue Vec = DAG.getUNDEF(ContainerVT);
5211 for (auto [I, Data] : enumerate(Operands)) {
5212 if (Data.empty())
5213 continue;
5215 for (unsigned I : seq<unsigned>(Data.size())) {
5216 const auto &[Idx1, Idx2, _] = Data[I];
5217 if (Values.contains(Idx1)) {
5218 assert(Idx2 != UINT_MAX && Values.contains(Idx2) &&
5219 "Expected both indices to be extracted already.");
5220 break;
5221 }
5222 SDValue V = ExtractValue(Idx1 >= NumOfSrcRegs ? V2 : V1,
5223 (Idx1 % NumOfSrcRegs) * NumOpElts);
5224 Values[Idx1] = V;
5225 if (Idx2 != UINT_MAX)
5226 Values[Idx2] = ExtractValue(Idx2 >= NumOfSrcRegs ? V2 : V1,
5227 (Idx2 % NumOfSrcRegs) * NumOpElts);
5228 }
5229 SDValue V;
5230 for (const auto &[Idx1, Idx2, Mask] : Data) {
5231 SDValue V1 = Values.at(Idx1);
5232 SDValue V2 = Idx2 == UINT_MAX ? V1 : Values.at(Idx2);
5233 V = PerformShuffle(V1, V2, Mask);
5234 Values[Idx1] = V;
5235 }
5236
5237 unsigned InsertIdx = I * NumOpElts;
5238 V = convertToScalableVector(M1VT, V, DAG, Subtarget);
5239 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, V,
5240 DAG.getVectorIdxConstant(InsertIdx, DL));
5241 }
5242 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5243}
5244
5245// Matches a subset of compress masks with a contiguous prefix of output
5246// elements. This could be extended to allow gaps by deciding which
5247// source elements to spuriously demand.
5249 int Last = -1;
5250 bool SawUndef = false;
5251 for (unsigned i = 0; i < Mask.size(); i++) {
5252 if (Mask[i] == -1) {
5253 SawUndef = true;
5254 continue;
5255 }
5256 if (SawUndef)
5257 return false;
5258 if (i > (unsigned)Mask[i])
5259 return false;
5260 if (Mask[i] <= Last)
5261 return false;
5262 Last = Mask[i];
5263 }
5264 return true;
5265}
5266
5267/// Given a shuffle where the indices are disjoint between the two sources,
5268/// e.g.:
5269///
5270/// t2:v4i8 = vector_shuffle t0:v4i8, t1:v4i8, <2, 7, 1, 4>
5271///
5272/// Merge the two sources into one and do a single source shuffle:
5273///
5274/// t2:v4i8 = vselect t1:v4i8, t0:v4i8, <0, 1, 0, 1>
5275/// t3:v4i8 = vector_shuffle t2:v4i8, undef, <2, 3, 1, 0>
5276///
5277/// A vselect will either be merged into a masked instruction or be lowered as a
5278/// vmerge.vvm, which is cheaper than a vrgather.vv.
5280 SelectionDAG &DAG,
5281 const RISCVSubtarget &Subtarget) {
5282 MVT VT = SVN->getSimpleValueType(0);
5283 MVT XLenVT = Subtarget.getXLenVT();
5284 SDLoc DL(SVN);
5285
5286 const ArrayRef<int> Mask = SVN->getMask();
5287
5288 // Work out which source each lane will come from.
5289 SmallVector<int, 16> Srcs(Mask.size(), -1);
5290
5291 for (int Idx : Mask) {
5292 if (Idx == -1)
5293 continue;
5294 unsigned SrcIdx = Idx % Mask.size();
5295 int Src = (uint32_t)Idx < Mask.size() ? 0 : 1;
5296 if (Srcs[SrcIdx] == -1)
5297 // Mark this source as using this lane.
5298 Srcs[SrcIdx] = Src;
5299 else if (Srcs[SrcIdx] != Src)
5300 // The other source is using this lane: not disjoint.
5301 return SDValue();
5302 }
5303
5304 SmallVector<SDValue> SelectMaskVals;
5305 for (int Lane : Srcs) {
5306 if (Lane == -1)
5307 SelectMaskVals.push_back(DAG.getUNDEF(XLenVT));
5308 else
5309 SelectMaskVals.push_back(DAG.getConstant(Lane ? 0 : 1, DL, XLenVT));
5310 }
5311 MVT MaskVT = VT.changeVectorElementType(MVT::i1);
5312 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, SelectMaskVals);
5313 SDValue Select = DAG.getNode(ISD::VSELECT, DL, VT, SelectMask,
5314 SVN->getOperand(0), SVN->getOperand(1));
5315
5316 // Move all indices relative to the first source.
5317 SmallVector<int> NewMask(Mask.size());
5318 for (unsigned I = 0; I < Mask.size(); I++) {
5319 if (Mask[I] == -1)
5320 NewMask[I] = -1;
5321 else
5322 NewMask[I] = Mask[I] % Mask.size();
5323 }
5324
5325 return DAG.getVectorShuffle(VT, DL, Select, DAG.getUNDEF(VT), NewMask);
5326}
5327
5328/// Try to widen element type to get a new mask value for a better permutation
5329/// sequence. This doesn't try to inspect the widened mask for profitability;
5330/// we speculate the widened form is equal or better. This has the effect of
5331/// reducing mask constant sizes - allowing cheaper materialization sequences
5332/// - and index sequence sizes - reducing register pressure and materialization
5333/// cost, at the cost of (possibly) an extra VTYPE toggle.
5335 SDLoc DL(Op);
5336 MVT VT = Op.getSimpleValueType();
5337 MVT ScalarVT = VT.getVectorElementType();
5338 unsigned ElementSize = ScalarVT.getFixedSizeInBits();
5339 SDValue V0 = Op.getOperand(0);
5340 SDValue V1 = Op.getOperand(1);
5341 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
5342
5343 // Avoid wasted work leading to isTypeLegal check failing below
5344 if (ElementSize > 32)
5345 return SDValue();
5346
5347 SmallVector<int, 8> NewMask;
5348 if (!widenShuffleMaskElts(Mask, NewMask))
5349 return SDValue();
5350
5351 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(ElementSize * 2)
5352 : MVT::getIntegerVT(ElementSize * 2);
5353 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
5354 if (!DAG.getTargetLoweringInfo().isTypeLegal(NewVT))
5355 return SDValue();
5356 V0 = DAG.getBitcast(NewVT, V0);
5357 V1 = DAG.getBitcast(NewVT, V1);
5358 return DAG.getBitcast(VT, DAG.getVectorShuffle(NewVT, DL, V0, V1, NewMask));
5359}
5360
5362 const RISCVSubtarget &Subtarget) {
5363 SDValue V1 = Op.getOperand(0);
5364 SDValue V2 = Op.getOperand(1);
5365 SDLoc DL(Op);
5366 MVT XLenVT = Subtarget.getXLenVT();
5367 MVT VT = Op.getSimpleValueType();
5368 unsigned NumElts = VT.getVectorNumElements();
5369 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
5370
5371 if (VT.getVectorElementType() == MVT::i1) {
5372 // Lower to a vror.vi of a larger element type if possible before we promote
5373 // i1s to i8s.
5374 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5375 return V;
5376 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
5377 return V;
5378
5379 // Promote i1 shuffle to i8 shuffle.
5380 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
5381 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
5382 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
5383 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
5384 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
5385 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
5386 ISD::SETNE);
5387 }
5388
5389 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5390
5391 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5392
5393 if (SVN->isSplat()) {
5394 const int Lane = SVN->getSplatIndex();
5395 if (Lane >= 0) {
5396 MVT SVT = VT.getVectorElementType();
5397
5398 // Turn splatted vector load into a strided load with an X0 stride.
5399 SDValue V = V1;
5400 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
5401 // with undef.
5402 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
5403 int Offset = Lane;
5404 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
5405 int OpElements =
5406 V.getOperand(0).getSimpleValueType().getVectorNumElements();
5407 V = V.getOperand(Offset / OpElements);
5408 Offset %= OpElements;
5409 }
5410
5411 // We need to ensure the load isn't atomic or volatile.
5412 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
5413 auto *Ld = cast<LoadSDNode>(V);
5414 Offset *= SVT.getStoreSize();
5415 SDValue NewAddr = DAG.getMemBasePlusOffset(
5416 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
5417
5418 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
5419 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
5420 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
5421 SDValue IntID =
5422 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
5423 SDValue Ops[] = {Ld->getChain(),
5424 IntID,
5425 DAG.getUNDEF(ContainerVT),
5426 NewAddr,
5427 DAG.getRegister(RISCV::X0, XLenVT),
5428 VL};
5429 SDValue NewLoad = DAG.getMemIntrinsicNode(
5430 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
5432 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
5433 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
5434 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
5435 }
5436
5437 MVT SplatVT = ContainerVT;
5438
5439 // f16 with zvfhmin and bf16 need to use an integer scalar load.
5440 if (SVT == MVT::bf16 ||
5441 (SVT == MVT::f16 && !Subtarget.hasStdExtZfh())) {
5442 SVT = MVT::i16;
5443 SplatVT = ContainerVT.changeVectorElementType(SVT);
5444 }
5445
5446 // Otherwise use a scalar load and splat. This will give the best
5447 // opportunity to fold a splat into the operation. ISel can turn it into
5448 // the x0 strided load if we aren't able to fold away the select.
5449 if (SVT.isFloatingPoint())
5450 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
5451 Ld->getPointerInfo().getWithOffset(Offset),
5452 Ld->getOriginalAlign(),
5453 Ld->getMemOperand()->getFlags());
5454 else
5455 V = DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
5456 Ld->getPointerInfo().getWithOffset(Offset), SVT,
5457 Ld->getOriginalAlign(),
5458 Ld->getMemOperand()->getFlags());
5460
5461 unsigned Opc = SplatVT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
5463 SDValue Splat =
5464 DAG.getNode(Opc, DL, SplatVT, DAG.getUNDEF(ContainerVT), V, VL);
5465 Splat = DAG.getBitcast(ContainerVT, Splat);
5466 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
5467 }
5468
5469 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5470 assert(Lane < (int)NumElts && "Unexpected lane!");
5471 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
5472 V1, DAG.getConstant(Lane, DL, XLenVT),
5473 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5474 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5475 }
5476 }
5477
5478 // For exact VLEN m2 or greater, try to split to m1 operations if we
5479 // can split cleanly.
5480 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
5481 return V;
5482
5483 ArrayRef<int> Mask = SVN->getMask();
5484
5485 if (SDValue V =
5486 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
5487 return V;
5488
5489 if (SDValue V =
5490 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
5491 return V;
5492
5493 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
5494 // available.
5495 if (Subtarget.hasStdExtZvkb())
5496 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5497 return V;
5498
5499 // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
5500 // be undef which can be handled with a single SLIDEDOWN/UP.
5501 int LoSrc, HiSrc;
5502 int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
5503 if (Rotation > 0) {
5504 SDValue LoV, HiV;
5505 if (LoSrc >= 0) {
5506 LoV = LoSrc == 0 ? V1 : V2;
5507 LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);
5508 }
5509 if (HiSrc >= 0) {
5510 HiV = HiSrc == 0 ? V1 : V2;
5511 HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);
5512 }
5513
5514 // We found a rotation. We need to slide HiV down by Rotation. Then we need
5515 // to slide LoV up by (NumElts - Rotation).
5516 unsigned InvRotate = NumElts - Rotation;
5517
5518 SDValue Res = DAG.getUNDEF(ContainerVT);
5519 if (HiV) {
5520 // Even though we could use a smaller VL, don't to avoid a vsetivli
5521 // toggle.
5522 Res = getVSlidedown(DAG, Subtarget, DL, ContainerVT, Res, HiV,
5523 DAG.getConstant(Rotation, DL, XLenVT), TrueMask, VL);
5524 }
5525 if (LoV)
5526 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, Res, LoV,
5527 DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL,
5529
5530 return convertFromScalableVector(VT, Res, DAG, Subtarget);
5531 }
5532
5533 if (ShuffleVectorInst::isReverseMask(Mask, NumElts) && V2.isUndef())
5534 return DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V1);
5535
5536 // If this is a deinterleave(2,4,8) and we can widen the vector, then we can
5537 // use shift and truncate to perform the shuffle.
5538 // TODO: For Factor=6, we can perform the first step of the deinterleave via
5539 // shift-and-trunc reducing total cost for everything except an mf8 result.
5540 // TODO: For Factor=4,8, we can do the same when the ratio isn't high enough
5541 // to do the entire operation.
5542 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
5543 const unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
5544 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
5545 for (unsigned Factor = 2; Factor <= MaxFactor; Factor <<= 1) {
5546 unsigned Index = 0;
5547 if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor, Index) &&
5548 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {
5549 if (SDValue Src = getSingleShuffleSrc(VT, ContainerVT, V1, V2))
5550 return getDeinterleaveShiftAndTrunc(DL, VT, Src, Factor, Index, DAG);
5551 }
5552 }
5553 }
5554
5555 if (SDValue V =
5556 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
5557 return V;
5558
5559 // Detect an interleave shuffle and lower to
5560 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
5561 int EvenSrc, OddSrc;
5562 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) {
5563 // Extract the halves of the vectors.
5564 MVT HalfVT = VT.getHalfNumVectorElementsVT();
5565
5566 // Recognize if one half is actually undef; the matching above will
5567 // otherwise reuse the even stream for the undef one. This improves
5568 // spread(2) shuffles.
5569 bool LaneIsUndef[2] = { true, true};
5570 for (unsigned i = 0; i < Mask.size(); i++)
5571 LaneIsUndef[i % 2] &= (Mask[i] == -1);
5572
5573 int Size = Mask.size();
5574 SDValue EvenV, OddV;
5575 if (LaneIsUndef[0]) {
5576 EvenV = DAG.getUNDEF(HalfVT);
5577 } else {
5578 assert(EvenSrc >= 0 && "Undef source?");
5579 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
5580 EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, EvenV,
5581 DAG.getVectorIdxConstant(EvenSrc % Size, DL));
5582 }
5583
5584 if (LaneIsUndef[1]) {
5585 OddV = DAG.getUNDEF(HalfVT);
5586 } else {
5587 assert(OddSrc >= 0 && "Undef source?");
5588 OddV = (OddSrc / Size) == 0 ? V1 : V2;
5589 OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV,
5590 DAG.getVectorIdxConstant(OddSrc % Size, DL));
5591 }
5592
5593 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
5594 }
5595
5596
5597 // Handle any remaining single source shuffles
5598 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
5599 if (V2.isUndef()) {
5600 // We might be able to express the shuffle as a bitrotate. But even if we
5601 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
5602 // shifts and a vor will have a higher throughput than a vrgather.
5603 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5604 return V;
5605
5606 // Before hitting generic lowering fallbacks, try to widen the mask
5607 // to a wider SEW.
5608 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
5609 return V;
5610
5611 // Can we generate a vcompress instead of a vrgather? These scale better
5612 // at high LMUL, at the cost of not being able to fold a following select
5613 // into them. The mask constants are also smaller than the index vector
5614 // constants, and thus easier to materialize.
5615 if (isCompressMask(Mask)) {
5616 SmallVector<SDValue> MaskVals(NumElts,
5617 DAG.getConstant(false, DL, XLenVT));
5618 for (auto Idx : Mask) {
5619 if (Idx == -1)
5620 break;
5621 assert(Idx >= 0 && (unsigned)Idx < NumElts);
5622 MaskVals[Idx] = DAG.getConstant(true, DL, XLenVT);
5623 }
5624 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5625 SDValue CompressMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5626 return DAG.getNode(ISD::VECTOR_COMPRESS, DL, VT, V1, CompressMask,
5627 DAG.getUNDEF(VT));
5628 }
5629
5630 // Match a spread(4,8) which can be done via extend and shift. Spread(2)
5631 // is fully covered in interleave(2) above, so it is ignored here.
5632 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
5633 unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
5634 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
5635 for (unsigned Factor = 4; Factor <= MaxFactor; Factor <<= 1) {
5636 unsigned Index;
5637 if (isSpreadMask(Mask, Factor, Index)) {
5638 MVT NarrowVT =
5639 MVT::getVectorVT(VT.getVectorElementType(), NumElts / Factor);
5640 SDValue Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT, V1,
5641 DAG.getVectorIdxConstant(0, DL));
5642 return getWideningSpread(Src, Factor, Index, DL, DAG);
5643 }
5644 }
5645 }
5646
5647 if (VT.getScalarSizeInBits() == 8 &&
5648 any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
5649 // On such a vector we're unable to use i8 as the index type.
5650 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
5651 // may involve vector splitting if we're already at LMUL=8, or our
5652 // user-supplied maximum fixed-length LMUL.
5653 return SDValue();
5654 }
5655
5656 // Base case for the two operand recursion below - handle the worst case
5657 // single source shuffle.
5658 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
5659 MVT IndexVT = VT.changeTypeToInteger();
5660 // Since we can't introduce illegal index types at this stage, use i16 and
5661 // vrgatherei16 if the corresponding index type for plain vrgather is greater
5662 // than XLenVT.
5663 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
5664 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5665 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5666 }
5667
5668 // If the mask allows, we can do all the index computation in 16 bits. This
5669 // requires less work and less register pressure at high LMUL, and creates
5670 // smaller constants which may be cheaper to materialize.
5671 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
5672 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
5673 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5674 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5675 }
5676
5677 MVT IndexContainerVT =
5678 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
5679
5680 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5681 SmallVector<SDValue> GatherIndicesLHS;
5682 for (int MaskIndex : Mask) {
5683 bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
5684 GatherIndicesLHS.push_back(IsLHSIndex
5685 ? DAG.getConstant(MaskIndex, DL, XLenVT)
5686 : DAG.getUNDEF(XLenVT));
5687 }
5688 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
5689 LHSIndices = convertToScalableVector(IndexContainerVT, LHSIndices, DAG,
5690 Subtarget);
5691 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
5692 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5693 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5694 }
5695
5696 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
5697 // merged with a second vrgather.
5698 SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
5699
5700 // Now construct the mask that will be used by the blended vrgather operation.
5701 // Construct the appropriate indices into each vector.
5702 for (int MaskIndex : Mask) {
5703 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
5704 ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
5705 ? MaskIndex : -1);
5706 ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
5707 }
5708
5709 // If the mask indices are disjoint between the two sources, we can lower it
5710 // as a vselect + a single source vrgather.vv. Don't do this if we think the
5711 // operands may end up being lowered to something cheaper than a vrgather.vv.
5712 if (!DAG.isSplatValue(V2) && !DAG.isSplatValue(V1) &&
5713 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskLHS.data(), VT) &&
5714 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskRHS.data(), VT) &&
5715 !ShuffleVectorInst::isIdentityMask(ShuffleMaskLHS, NumElts) &&
5716 !ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts))
5717 if (SDValue V = lowerDisjointIndicesShuffle(SVN, DAG, Subtarget))
5718 return V;
5719
5720 // Before hitting generic lowering fallbacks, try to widen the mask
5721 // to a wider SEW.
5722 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
5723 return V;
5724
5725 // Try to pick a profitable operand order.
5726 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
5727 SwapOps = SwapOps ^ ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts);
5728
5729 // Recursively invoke lowering for each operand if we had two
5730 // independent single source shuffles, and then combine the result via a
5731 // vselect. Note that the vselect will likely be folded back into the
5732 // second permute (vrgather, or other) by the post-isel combine.
5733 V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
5734 V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);
5735
5736 SmallVector<SDValue> MaskVals;
5737 for (int MaskIndex : Mask) {
5738 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
5739 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5740 }
5741
5742 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5743 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5744 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5745
5746 if (SwapOps)
5747 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
5748 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);
5749}
5750
5752 // Only support legal VTs for other shuffles for now.
5753 if (!isTypeLegal(VT))
5754 return false;
5755
5756 // Support splats for any type. These should type legalize well.
5757 if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
5758 return true;
5759
5760 MVT SVT = VT.getSimpleVT();
5761
5762 // Not for i1 vectors.
5763 if (SVT.getScalarType() == MVT::i1)
5764 return false;
5765
5766 int Dummy1, Dummy2;
5767 return (isElementRotate(Dummy1, Dummy2, M) > 0) ||
5768 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
5769}
5770
5771// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
5772// the exponent.
5773SDValue
5774RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
5775 SelectionDAG &DAG) const {
5776 MVT VT = Op.getSimpleValueType();
5777 unsigned EltSize = VT.getScalarSizeInBits();
5778 SDValue Src = Op.getOperand(0);
5779 SDLoc DL(Op);
5780 MVT ContainerVT = VT;
5781
5782 SDValue Mask, VL;
5783 if (Op->isVPOpcode()) {
5784 Mask = Op.getOperand(1);
5785 if (VT.isFixedLengthVector())
5786 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5787 Subtarget);
5788 VL = Op.getOperand(2);
5789 }
5790
5791 // We choose FP type that can represent the value if possible. Otherwise, we
5792 // use rounding to zero conversion for correct exponent of the result.
5793 // TODO: Use f16 for i8 when possible?
5794 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
5795 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
5796 FloatEltVT = MVT::f32;
5797 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
5798
5799 // Legal types should have been checked in the RISCVTargetLowering
5800 // constructor.
5801 // TODO: Splitting may make sense in some cases.
5802 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
5803 "Expected legal float type!");
5804
5805 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
5806 // The trailing zero count is equal to log2 of this single bit value.
5807 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
5808 SDValue Neg = DAG.getNegative(Src, DL, VT);
5809 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
5810 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
5811 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
5812 Src, Mask, VL);
5813 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
5814 }
5815
5816 // We have a legal FP type, convert to it.
5817 SDValue FloatVal;
5818 if (FloatVT.bitsGT(VT)) {
5819 if (Op->isVPOpcode())
5820 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
5821 else
5822 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
5823 } else {
5824 // Use RTZ to avoid rounding influencing exponent of FloatVal.
5825 if (VT.isFixedLengthVector()) {
5826 ContainerVT = getContainerForFixedLengthVector(VT);
5827 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
5828 }
5829 if (!Op->isVPOpcode())
5830 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5831 SDValue RTZRM =
5833 MVT ContainerFloatVT =
5834 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
5835 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
5836 Src, Mask, RTZRM, VL);
5837 if (VT.isFixedLengthVector())
5838 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
5839 }
5840 // Bitcast to integer and shift the exponent to the LSB.
5841 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
5842 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
5843 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
5844
5845 SDValue Exp;
5846 // Restore back to original type. Truncation after SRL is to generate vnsrl.
5847 if (Op->isVPOpcode()) {
5848 Exp = DAG.getNode(ISD::VP_SRL, DL, IntVT, Bitcast,
5849 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
5850 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
5851 } else {
5852 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
5853 DAG.getConstant(ShiftAmt, DL, IntVT));
5854 if (IntVT.bitsLT(VT))
5855 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
5856 else if (IntVT.bitsGT(VT))
5857 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
5858 }
5859
5860 // The exponent contains log2 of the value in biased form.
5861 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
5862 // For trailing zeros, we just need to subtract the bias.
5863 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
5864 return DAG.getNode(ISD::SUB, DL, VT, Exp,
5865 DAG.getConstant(ExponentBias, DL, VT));
5866 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
5867 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
5868 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
5869
5870 // For leading zeros, we need to remove the bias and convert from log2 to
5871 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
5872 unsigned Adjust = ExponentBias + (EltSize - 1);
5873 SDValue Res;
5874 if (Op->isVPOpcode())
5875 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
5876 Mask, VL);
5877 else
5878 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
5879
5880 // The above result with zero input equals to Adjust which is greater than
5881 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
5882 if (Op.getOpcode() == ISD::CTLZ)
5883 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
5884 else if (Op.getOpcode() == ISD::VP_CTLZ)
5885 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
5886 DAG.getConstant(EltSize, DL, VT), Mask, VL);
5887 return Res;
5888}
5889
5890SDValue RISCVTargetLowering::lowerVPCttzElements(SDValue Op,
5891 SelectionDAG &DAG) const {
5892 SDLoc DL(Op);
5893 MVT XLenVT = Subtarget.getXLenVT();
5894 SDValue Source = Op->getOperand(0);
5895 MVT SrcVT = Source.getSimpleValueType();
5896 SDValue Mask = Op->getOperand(1);
5897 SDValue EVL = Op->getOperand(2);
5898
5899 if (SrcVT.isFixedLengthVector()) {
5900 MVT ContainerVT = getContainerForFixedLengthVector(SrcVT);
5901 Source = convertToScalableVector(ContainerVT, Source, DAG, Subtarget);
5902 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5903 Subtarget);
5904 SrcVT = ContainerVT;
5905 }
5906
5907 // Convert to boolean vector.
5908 if (SrcVT.getScalarType() != MVT::i1) {
5909 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
5910 SrcVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorElementCount());
5911 Source = DAG.getNode(RISCVISD::SETCC_VL, DL, SrcVT,
5912 {Source, AllZero, DAG.getCondCode(ISD::SETNE),
5913 DAG.getUNDEF(SrcVT), Mask, EVL});
5914 }
5915
5916 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Source, Mask, EVL);
5917 if (Op->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_UNDEF)
5918 // In this case, we can interpret poison as -1, so nothing to do further.
5919 return Res;
5920
5921 // Convert -1 to VL.
5922 SDValue SetCC =
5923 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
5924 Res = DAG.getSelect(DL, XLenVT, SetCC, EVL, Res);
5925 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
5926}
5927
5928// While RVV has alignment restrictions, we should always be able to load as a
5929// legal equivalently-sized byte-typed vector instead. This method is
5930// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
5931// the load is already correctly-aligned, it returns SDValue().
5932SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
5933 SelectionDAG &DAG) const {
5934 auto *Load = cast<LoadSDNode>(Op);
5935 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
5936
5938 Load->getMemoryVT(),
5939 *Load->getMemOperand()))
5940 return SDValue();
5941
5942 SDLoc DL(Op);
5943 MVT VT = Op.getSimpleValueType();
5944 unsigned EltSizeBits = VT.getScalarSizeInBits();
5945 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5946 "Unexpected unaligned RVV load type");
5947 MVT NewVT =
5948 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5949 assert(NewVT.isValid() &&
5950 "Expecting equally-sized RVV vector types to be legal");
5951 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
5952 Load->getPointerInfo(), Load->getOriginalAlign(),
5953 Load->getMemOperand()->getFlags());
5954 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
5955}
5956
5957// While RVV has alignment restrictions, we should always be able to store as a
5958// legal equivalently-sized byte-typed vector instead. This method is
5959// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
5960// returns SDValue() if the store is already correctly aligned.
5961SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
5962 SelectionDAG &DAG) const {
5963 auto *Store = cast<StoreSDNode>(Op);
5964 assert(Store && Store->getValue().getValueType().isVector() &&
5965 "Expected vector store");
5966
5968 Store->getMemoryVT(),
5969 *Store->getMemOperand()))
5970 return SDValue();
5971
5972 SDLoc DL(Op);
5973 SDValue StoredVal = Store->getValue();
5974 MVT VT = StoredVal.getSimpleValueType();
5975 unsigned EltSizeBits = VT.getScalarSizeInBits();
5976 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5977 "Unexpected unaligned RVV store type");
5978 MVT NewVT =
5979 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5980 assert(NewVT.isValid() &&
5981 "Expecting equally-sized RVV vector types to be legal");
5982 StoredVal = DAG.getBitcast(NewVT, StoredVal);
5983 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
5984 Store->getPointerInfo(), Store->getOriginalAlign(),
5985 Store->getMemOperand()->getFlags());
5986}
5987
5989 const RISCVSubtarget &Subtarget) {
5990 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
5991
5992 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
5993
5994 // All simm32 constants should be handled by isel.
5995 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
5996 // this check redundant, but small immediates are common so this check
5997 // should have better compile time.
5998 if (isInt<32>(Imm))
5999 return Op;
6000
6001 // We only need to cost the immediate, if constant pool lowering is enabled.
6002 if (!Subtarget.useConstantPoolForLargeInts())
6003 return Op;
6004
6006 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
6007 return Op;
6008
6009 // Optimizations below are disabled for opt size. If we're optimizing for
6010 // size, use a constant pool.
6011 if (DAG.shouldOptForSize())
6012 return SDValue();
6013
6014 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
6015 // that if it will avoid a constant pool.
6016 // It will require an extra temporary register though.
6017 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
6018 // low and high 32 bits are the same and bit 31 and 63 are set.
6019 unsigned ShiftAmt, AddOpc;
6020 RISCVMatInt::InstSeq SeqLo =
6021 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
6022 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
6023 return Op;
6024
6025 return SDValue();
6026}
6027
6028SDValue RISCVTargetLowering::lowerConstantFP(SDValue Op,
6029 SelectionDAG &DAG) const {
6030 MVT VT = Op.getSimpleValueType();
6031 const APFloat &Imm = cast<ConstantFPSDNode>(Op)->getValueAPF();
6032
6033 // Can this constant be selected by a Zfa FLI instruction?
6034 bool Negate = false;
6035 int Index = getLegalZfaFPImm(Imm, VT);
6036
6037 // If the constant is negative, try negating.
6038 if (Index < 0 && Imm.isNegative()) {
6039 Index = getLegalZfaFPImm(-Imm, VT);
6040 Negate = true;
6041 }
6042
6043 // If we couldn't find a FLI lowering, fall back to generic code.
6044 if (Index < 0)
6045 return SDValue();
6046
6047 // Emit an FLI+FNEG. We use a custom node to hide from constant folding.
6048 SDLoc DL(Op);
6049 SDValue Const =
6050 DAG.getNode(RISCVISD::FLI, DL, VT,
6051 DAG.getTargetConstant(Index, DL, Subtarget.getXLenVT()));
6052 if (!Negate)
6053 return Const;
6054
6055 return DAG.getNode(ISD::FNEG, DL, VT, Const);
6056}
6057
6059 const RISCVSubtarget &Subtarget) {
6060 SDLoc dl(Op);
6061 AtomicOrdering FenceOrdering =
6062 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
6063 SyncScope::ID FenceSSID =
6064 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
6065
6066 if (Subtarget.hasStdExtZtso()) {
6067 // The only fence that needs an instruction is a sequentially-consistent
6068 // cross-thread fence.
6069 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
6070 FenceSSID == SyncScope::System)
6071 return Op;
6072
6073 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
6074 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
6075 }
6076
6077 // singlethread fences only synchronize with signal handlers on the same
6078 // thread and thus only need to preserve instruction order, not actually
6079 // enforce memory ordering.
6080 if (FenceSSID == SyncScope::SingleThread)
6081 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
6082 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
6083
6084 return Op;
6085}
6086
6087SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
6088 SelectionDAG &DAG) const {
6089 SDLoc DL(Op);
6090 MVT VT = Op.getSimpleValueType();
6091 MVT XLenVT = Subtarget.getXLenVT();
6092 unsigned Check = Op.getConstantOperandVal(1);
6093 unsigned TDCMask = 0;
6094 if (Check & fcSNan)
6095 TDCMask |= RISCV::FPMASK_Signaling_NaN;
6096 if (Check & fcQNan)
6097 TDCMask |= RISCV::FPMASK_Quiet_NaN;
6098 if (Check & fcPosInf)
6100 if (Check & fcNegInf)
6102 if (Check & fcPosNormal)
6104 if (Check & fcNegNormal)
6106 if (Check & fcPosSubnormal)
6108 if (Check & fcNegSubnormal)
6110 if (Check & fcPosZero)
6111 TDCMask |= RISCV::FPMASK_Positive_Zero;
6112 if (Check & fcNegZero)
6113 TDCMask |= RISCV::FPMASK_Negative_Zero;
6114
6115 bool IsOneBitMask = isPowerOf2_32(TDCMask);
6116
6117 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
6118
6119 if (VT.isVector()) {
6120 SDValue Op0 = Op.getOperand(0);
6121 MVT VT0 = Op.getOperand(0).getSimpleValueType();
6122
6123 if (VT.isScalableVector()) {
6125 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
6126 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6127 Mask = Op.getOperand(2);
6128 VL = Op.getOperand(3);
6129 }
6130 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
6131 VL, Op->getFlags());
6132 if (IsOneBitMask)
6133 return DAG.getSetCC(DL, VT, FPCLASS,
6134 DAG.getConstant(TDCMask, DL, DstVT),
6136 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
6137 DAG.getConstant(TDCMask, DL, DstVT));
6138 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
6139 ISD::SETNE);
6140 }
6141
6142 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
6143 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6144 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
6145 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
6146 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6147 Mask = Op.getOperand(2);
6148 MVT MaskContainerVT =
6149 getContainerForFixedLengthVector(Mask.getSimpleValueType());
6150 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
6151 VL = Op.getOperand(3);
6152 }
6153 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
6154
6155 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
6156 Mask, VL, Op->getFlags());
6157
6158 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
6159 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
6160 if (IsOneBitMask) {
6161 SDValue VMSEQ =
6162 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
6163 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
6164 DAG.getUNDEF(ContainerVT), Mask, VL});
6165 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
6166 }
6167 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
6168 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
6169
6170 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
6171 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
6172 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
6173
6174 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
6175 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
6176 DAG.getUNDEF(ContainerVT), Mask, VL});
6177 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
6178 }
6179
6180 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
6181 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
6182 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
6184 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6185}
6186
6187// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
6188// operations propagate nans.
6190 const RISCVSubtarget &Subtarget) {
6191 SDLoc DL(Op);
6192 MVT VT = Op.getSimpleValueType();
6193
6194 SDValue X = Op.getOperand(0);
6195 SDValue Y = Op.getOperand(1);
6196
6197 if (!VT.isVector()) {
6198 MVT XLenVT = Subtarget.getXLenVT();
6199
6200 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
6201 // ensures that when one input is a nan, the other will also be a nan
6202 // allowing the nan to propagate. If both inputs are nan, this will swap the
6203 // inputs which is harmless.
6204
6205 SDValue NewY = Y;
6206 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
6207 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
6208 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
6209 }
6210
6211 SDValue NewX = X;
6212 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
6213 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
6214 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
6215 }
6216
6217 unsigned Opc =
6218 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
6219 return DAG.getNode(Opc, DL, VT, NewX, NewY);
6220 }
6221
6222 // Check no NaNs before converting to fixed vector scalable.
6223 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
6224 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
6225
6226 MVT ContainerVT = VT;
6227 if (VT.isFixedLengthVector()) {
6228 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
6229 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
6230 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
6231 }
6232
6233 SDValue Mask, VL;
6234 if (Op->isVPOpcode()) {
6235 Mask = Op.getOperand(2);
6236 if (VT.isFixedLengthVector())
6237 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6238 Subtarget);
6239 VL = Op.getOperand(3);
6240 } else {
6241 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6242 }
6243
6244 SDValue NewY = Y;
6245 if (!XIsNeverNan) {
6246 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
6247 {X, X, DAG.getCondCode(ISD::SETOEQ),
6248 DAG.getUNDEF(ContainerVT), Mask, VL});
6249 NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X,
6250 DAG.getUNDEF(ContainerVT), VL);
6251 }
6252
6253 SDValue NewX = X;
6254 if (!YIsNeverNan) {
6255 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
6256 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
6257 DAG.getUNDEF(ContainerVT), Mask, VL});
6258 NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y,
6259 DAG.getUNDEF(ContainerVT), VL);
6260 }
6261
6262 unsigned Opc =
6263 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
6266 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
6267 DAG.getUNDEF(ContainerVT), Mask, VL);
6268 if (VT.isFixedLengthVector())
6269 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
6270 return Res;
6271}
6272
6274 const RISCVSubtarget &Subtarget) {
6275 bool IsFABS = Op.getOpcode() == ISD::FABS;
6276 assert((IsFABS || Op.getOpcode() == ISD::FNEG) &&
6277 "Wrong opcode for lowering FABS or FNEG.");
6278
6279 MVT XLenVT = Subtarget.getXLenVT();
6280 MVT VT = Op.getSimpleValueType();
6281 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
6282
6283 SDLoc DL(Op);
6284 SDValue Fmv =
6285 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op.getOperand(0));
6286
6287 APInt Mask = IsFABS ? APInt::getSignedMaxValue(16) : APInt::getSignMask(16);
6288 Mask = Mask.sext(Subtarget.getXLen());
6289
6290 unsigned LogicOpc = IsFABS ? ISD::AND : ISD::XOR;
6291 SDValue Logic =
6292 DAG.getNode(LogicOpc, DL, XLenVT, Fmv, DAG.getConstant(Mask, DL, XLenVT));
6293 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, Logic);
6294}
6295
6297 const RISCVSubtarget &Subtarget) {
6298 assert(Op.getOpcode() == ISD::FCOPYSIGN && "Unexpected opcode");
6299
6300 MVT XLenVT = Subtarget.getXLenVT();
6301 MVT VT = Op.getSimpleValueType();
6302 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
6303
6304 SDValue Mag = Op.getOperand(0);
6305 SDValue Sign = Op.getOperand(1);
6306
6307 SDLoc DL(Op);
6308
6309 // Get sign bit into an integer value.
6310 SDValue SignAsInt;
6311 unsigned SignSize = Sign.getValueSizeInBits();
6312 if (SignSize == Subtarget.getXLen()) {
6313 SignAsInt = DAG.getNode(ISD::BITCAST, DL, XLenVT, Sign);
6314 } else if (SignSize == 16) {
6315 SignAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Sign);
6316 } else if (SignSize == 32) {
6317 SignAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, XLenVT, Sign);
6318 } else if (SignSize == 64) {
6319 assert(XLenVT == MVT::i32 && "Unexpected type");
6320 // Copy the upper word to integer.
6321 SignAsInt = DAG.getNode(RISCVISD::SplitF64, DL, {MVT::i32, MVT::i32}, Sign)
6322 .getValue(1);
6323 SignSize = 32;
6324 } else
6325 llvm_unreachable("Unexpected sign size");
6326
6327 // Get the signbit at the right position for MagAsInt.
6328 int ShiftAmount = (int)SignSize - (int)Mag.getValueSizeInBits();
6329 if (ShiftAmount > 0) {
6330 SignAsInt = DAG.getNode(ISD::SRL, DL, XLenVT, SignAsInt,
6331 DAG.getConstant(ShiftAmount, DL, XLenVT));
6332 } else if (ShiftAmount < 0) {
6333 SignAsInt = DAG.getNode(ISD::SHL, DL, XLenVT, SignAsInt,
6334 DAG.getConstant(-ShiftAmount, DL, XLenVT));
6335 }
6336
6337 // Mask the sign bit and any bits above it. The extra bits will be dropped
6338 // when we convert back to FP.
6339 SDValue SignMask = DAG.getConstant(
6340 APInt::getSignMask(16).sext(Subtarget.getXLen()), DL, XLenVT);
6341 SDValue SignBit = DAG.getNode(ISD::AND, DL, XLenVT, SignAsInt, SignMask);
6342
6343 // Transform Mag value to integer, and clear the sign bit.
6344 SDValue MagAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Mag);
6345 SDValue ClearSignMask = DAG.getConstant(
6346 APInt::getSignedMaxValue(16).sext(Subtarget.getXLen()), DL, XLenVT);
6347 SDValue ClearedSign =
6348 DAG.getNode(ISD::AND, DL, XLenVT, MagAsInt, ClearSignMask);
6349
6350 SDValue CopiedSign = DAG.getNode(ISD::OR, DL, XLenVT, ClearedSign, SignBit,
6352
6353 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, CopiedSign);
6354}
6355
6356/// Get a RISC-V target specified VL op for a given SDNode.
6357static unsigned getRISCVVLOp(SDValue Op) {
6358#define OP_CASE(NODE) \
6359 case ISD::NODE: \
6360 return RISCVISD::NODE##_VL;
6361#define VP_CASE(NODE) \
6362 case ISD::VP_##NODE: \
6363 return RISCVISD::NODE##_VL;
6364 // clang-format off
6365 switch (Op.getOpcode()) {
6366 default:
6367 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
6368 OP_CASE(ADD)
6369 OP_CASE(SUB)
6370 OP_CASE(MUL)
6371 OP_CASE(MULHS)
6372 OP_CASE(MULHU)
6373 OP_CASE(SDIV)
6374 OP_CASE(SREM)
6375 OP_CASE(UDIV)
6376 OP_CASE(UREM)
6377 OP_CASE(SHL)
6378 OP_CASE(SRA)
6379 OP_CASE(SRL)
6380 OP_CASE(ROTL)
6381 OP_CASE(ROTR)
6382 OP_CASE(BSWAP)
6383 OP_CASE(CTTZ)
6384 OP_CASE(CTLZ)
6385 OP_CASE(CTPOP)
6386 OP_CASE(BITREVERSE)
6387 OP_CASE(SADDSAT)
6388 OP_CASE(UADDSAT)
6389 OP_CASE(SSUBSAT)
6390 OP_CASE(USUBSAT)
6391 OP_CASE(AVGFLOORS)
6392 OP_CASE(AVGFLOORU)
6393 OP_CASE(AVGCEILS)
6394 OP_CASE(AVGCEILU)
6395 OP_CASE(FADD)
6396 OP_CASE(FSUB)
6397 OP_CASE(FMUL)
6398 OP_CASE(FDIV)
6399 OP_CASE(FNEG)
6400 OP_CASE(FABS)
6401 OP_CASE(FSQRT)
6402 OP_CASE(SMIN)
6403 OP_CASE(SMAX)
6404 OP_CASE(UMIN)
6405 OP_CASE(UMAX)
6406 OP_CASE(STRICT_FADD)
6407 OP_CASE(STRICT_FSUB)
6408 OP_CASE(STRICT_FMUL)
6409 OP_CASE(STRICT_FDIV)
6410 OP_CASE(STRICT_FSQRT)
6411 VP_CASE(ADD) // VP_ADD
6412 VP_CASE(SUB) // VP_SUB
6413 VP_CASE(MUL) // VP_MUL
6414 VP_CASE(SDIV) // VP_SDIV
6415 VP_CASE(SREM) // VP_SREM
6416 VP_CASE(UDIV) // VP_UDIV
6417 VP_CASE(UREM) // VP_UREM
6418 VP_CASE(SHL) // VP_SHL
6419 VP_CASE(FADD) // VP_FADD
6420 VP_CASE(FSUB) // VP_FSUB
6421 VP_CASE(FMUL) // VP_FMUL
6422 VP_CASE(FDIV) // VP_FDIV
6423 VP_CASE(FNEG) // VP_FNEG
6424 VP_CASE(FABS) // VP_FABS
6425 VP_CASE(SMIN) // VP_SMIN
6426 VP_CASE(SMAX) // VP_SMAX
6427 VP_CASE(UMIN) // VP_UMIN
6428 VP_CASE(UMAX) // VP_UMAX
6429 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
6430 VP_CASE(SETCC) // VP_SETCC
6431 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
6432 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
6433 VP_CASE(BITREVERSE) // VP_BITREVERSE
6434 VP_CASE(SADDSAT) // VP_SADDSAT
6435 VP_CASE(UADDSAT) // VP_UADDSAT
6436 VP_CASE(SSUBSAT) // VP_SSUBSAT
6437 VP_CASE(USUBSAT) // VP_USUBSAT
6438 VP_CASE(BSWAP) // VP_BSWAP
6439 VP_CASE(CTLZ) // VP_CTLZ
6440 VP_CASE(CTTZ) // VP_CTTZ
6441 VP_CASE(CTPOP) // VP_CTPOP
6443 case ISD::VP_CTLZ_ZERO_UNDEF:
6444 return RISCVISD::CTLZ_VL;
6446 case ISD::VP_CTTZ_ZERO_UNDEF:
6447 return RISCVISD::CTTZ_VL;
6448 case ISD::FMA:
6449 case ISD::VP_FMA:
6450 return RISCVISD::VFMADD_VL;
6451 case ISD::STRICT_FMA:
6453 case ISD::AND:
6454 case ISD::VP_AND:
6455 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6456 return RISCVISD::VMAND_VL;
6457 return RISCVISD::AND_VL;
6458 case ISD::OR:
6459 case ISD::VP_OR:
6460 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6461 return RISCVISD::VMOR_VL;
6462 return RISCVISD::OR_VL;
6463 case ISD::XOR:
6464 case ISD::VP_XOR:
6465 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6466 return RISCVISD::VMXOR_VL;
6467 return RISCVISD::XOR_VL;
6468 case ISD::VP_SELECT:
6469 case ISD::VP_MERGE:
6470 return RISCVISD::VMERGE_VL;
6471 case ISD::VP_SRA:
6472 return RISCVISD::SRA_VL;
6473 case ISD::VP_SRL:
6474 return RISCVISD::SRL_VL;
6475 case ISD::VP_SQRT:
6476 return RISCVISD::FSQRT_VL;
6477 case ISD::VP_SIGN_EXTEND:
6478 return RISCVISD::VSEXT_VL;
6479 case ISD::VP_ZERO_EXTEND:
6480 return RISCVISD::VZEXT_VL;
6481 case ISD::VP_FP_TO_SINT:
6483 case ISD::VP_FP_TO_UINT:
6485 case ISD::FMINNUM:
6486 case ISD::VP_FMINNUM:
6487 return RISCVISD::VFMIN_VL;
6488 case ISD::FMAXNUM:
6489 case ISD::VP_FMAXNUM:
6490 return RISCVISD::VFMAX_VL;
6491 case ISD::LRINT:
6492 case ISD::VP_LRINT:
6493 case ISD::LLRINT:
6494 case ISD::VP_LLRINT:
6496 }
6497 // clang-format on
6498#undef OP_CASE
6499#undef VP_CASE
6500}
6501
6502/// Return true if a RISC-V target specified op has a passthru operand.
6503static bool hasPassthruOp(unsigned Opcode) {
6504 assert(Opcode > RISCVISD::FIRST_NUMBER &&
6506 "not a RISC-V target specific op");
6507 static_assert(
6510 "adding target specific op should update this function");
6511 if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL)
6512 return true;
6513 if (Opcode == RISCVISD::FCOPYSIGN_VL)
6514 return true;
6515 if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL)
6516 return true;
6517 if (Opcode == RISCVISD::SETCC_VL)
6518 return true;
6519 if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL)
6520 return true;
6521 if (Opcode == RISCVISD::VMERGE_VL)
6522 return true;
6523 return false;
6524}
6525
6526/// Return true if a RISC-V target specified op has a mask operand.
6527static bool hasMaskOp(unsigned Opcode) {
6528 assert(Opcode > RISCVISD::FIRST_NUMBER &&
6530 "not a RISC-V target specific op");
6531 static_assert(
6534 "adding target specific op should update this function");
6535 if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
6536 return true;
6537 if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL)
6538 return true;
6539 if (Opcode >= RISCVISD::STRICT_FADD_VL &&
6541 return true;
6542 return false;
6543}
6544
6546 const RISCVSubtarget &Subtarget) {
6547 if (Op.getValueType() == MVT::nxv32f16 &&
6548 (Subtarget.hasVInstructionsF16Minimal() &&
6549 !Subtarget.hasVInstructionsF16()))
6550 return true;
6551 if (Op.getValueType() == MVT::nxv32bf16)
6552 return true;
6553 return false;
6554}
6555
6557 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
6558 SDLoc DL(Op);
6559
6562
6563 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6564 if (!Op.getOperand(j).getValueType().isVector()) {
6565 LoOperands[j] = Op.getOperand(j);
6566 HiOperands[j] = Op.getOperand(j);
6567 continue;
6568 }
6569 std::tie(LoOperands[j], HiOperands[j]) =
6570 DAG.SplitVector(Op.getOperand(j), DL);
6571 }
6572
6573 SDValue LoRes =
6574 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
6575 SDValue HiRes =
6576 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
6577
6578 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
6579}
6580
6582 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
6583 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
6584 SDLoc DL(Op);
6585
6588
6589 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6590 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
6591 std::tie(LoOperands[j], HiOperands[j]) =
6592 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
6593 continue;
6594 }
6595 if (!Op.getOperand(j).getValueType().isVector()) {
6596 LoOperands[j] = Op.getOperand(j);
6597 HiOperands[j] = Op.getOperand(j);
6598 continue;
6599 }
6600 std::tie(LoOperands[j], HiOperands[j]) =
6601 DAG.SplitVector(Op.getOperand(j), DL);
6602 }
6603
6604 SDValue LoRes =
6605 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
6606 SDValue HiRes =
6607 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
6608
6609 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
6610}
6611
6613 SDLoc DL(Op);
6614
6615 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
6616 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
6617 auto [EVLLo, EVLHi] =
6618 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
6619
6620 SDValue ResLo =
6621 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6622 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
6623 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6624 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
6625}
6626
6628
6629 assert(Op->isStrictFPOpcode());
6630
6631 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
6632
6633 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
6634 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
6635
6636 SDLoc DL(Op);
6637
6640
6641 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6642 if (!Op.getOperand(j).getValueType().isVector()) {
6643 LoOperands[j] = Op.getOperand(j);
6644 HiOperands[j] = Op.getOperand(j);
6645 continue;
6646 }
6647 std::tie(LoOperands[j], HiOperands[j]) =
6648 DAG.SplitVector(Op.getOperand(j), DL);
6649 }
6650
6651 SDValue LoRes =
6652 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
6653 HiOperands[0] = LoRes.getValue(1);
6654 SDValue HiRes =
6655 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
6656
6657 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
6658 LoRes.getValue(0), HiRes.getValue(0));
6659 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
6660}
6661
6663 SelectionDAG &DAG) const {
6664 switch (Op.getOpcode()) {
6665 default:
6666 report_fatal_error("unimplemented operand");
6667 case ISD::ATOMIC_FENCE:
6668 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
6669 case ISD::GlobalAddress:
6670 return lowerGlobalAddress(Op, DAG);
6671 case ISD::BlockAddress:
6672 return lowerBlockAddress(Op, DAG);
6673 case ISD::ConstantPool:
6674 return lowerConstantPool(Op, DAG);
6675 case ISD::JumpTable:
6676 return lowerJumpTable(Op, DAG);
6678 return lowerGlobalTLSAddress(Op, DAG);
6679 case ISD::Constant:
6680 return lowerConstant(Op, DAG, Subtarget);
6681 case ISD::ConstantFP:
6682 return lowerConstantFP(Op, DAG);
6683 case ISD::SELECT:
6684 return lowerSELECT(Op, DAG);
6685 case ISD::BRCOND:
6686 return lowerBRCOND(Op, DAG);
6687 case ISD::VASTART:
6688 return lowerVASTART(Op, DAG);
6689 case ISD::FRAMEADDR:
6690 return lowerFRAMEADDR(Op, DAG);
6691 case ISD::RETURNADDR:
6692 return lowerRETURNADDR(Op, DAG);
6693 case ISD::SHL_PARTS:
6694 return lowerShiftLeftParts(Op, DAG);
6695 case ISD::SRA_PARTS:
6696 return lowerShiftRightParts(Op, DAG, true);
6697 case ISD::SRL_PARTS:
6698 return lowerShiftRightParts(Op, DAG, false);
6699 case ISD::ROTL:
6700 case ISD::ROTR:
6701 if (Op.getValueType().isFixedLengthVector()) {
6702 assert(Subtarget.hasStdExtZvkb());
6703 return lowerToScalableOp(Op, DAG);
6704 }
6705 assert(Subtarget.hasVendorXTHeadBb() &&
6706 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
6707 "Unexpected custom legalization");
6708 // XTHeadBb only supports rotate by constant.
6709 if (!isa<ConstantSDNode>(Op.getOperand(1)))
6710 return SDValue();
6711 return Op;
6712 case ISD::BITCAST: {
6713 SDLoc DL(Op);
6714 EVT VT = Op.getValueType();
6715 SDValue Op0 = Op.getOperand(0);
6716 EVT Op0VT = Op0.getValueType();
6717 MVT XLenVT = Subtarget.getXLenVT();
6718 if (Op0VT == MVT::i16 &&
6719 ((VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
6720 (VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
6721 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
6722 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, NewOp0);
6723 }
6724 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
6725 Subtarget.hasStdExtFOrZfinx()) {
6726 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6727 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
6728 }
6729 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit() &&
6730 Subtarget.hasStdExtDOrZdinx()) {
6731 SDValue Lo, Hi;
6732 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
6733 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
6734 }
6735
6736 // Consider other scalar<->scalar casts as legal if the types are legal.
6737 // Otherwise expand them.
6738 if (!VT.isVector() && !Op0VT.isVector()) {
6739 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
6740 return Op;
6741 return SDValue();
6742 }
6743
6744 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
6745 "Unexpected types");
6746
6747 if (VT.isFixedLengthVector()) {
6748 // We can handle fixed length vector bitcasts with a simple replacement
6749 // in isel.
6750 if (Op0VT.isFixedLengthVector())
6751 return Op;
6752 // When bitcasting from scalar to fixed-length vector, insert the scalar
6753 // into a one-element vector of the result type, and perform a vector
6754 // bitcast.
6755 if (!Op0VT.isVector()) {
6756 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
6757 if (!isTypeLegal(BVT))
6758 return SDValue();
6759 return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
6760 DAG.getUNDEF(BVT), Op0,
6761 DAG.getVectorIdxConstant(0, DL)));
6762 }
6763 return SDValue();
6764 }
6765 // Custom-legalize bitcasts from fixed-length vector types to scalar types
6766 // thus: bitcast the vector to a one-element vector type whose element type
6767 // is the same as the result type, and extract the first element.
6768 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
6769 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
6770 if (!isTypeLegal(BVT))
6771 return SDValue();
6772 SDValue BVec = DAG.getBitcast(BVT, Op0);
6773 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
6774 DAG.getVectorIdxConstant(0, DL));
6775 }
6776 return SDValue();
6777 }
6779 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
6781 return LowerINTRINSIC_W_CHAIN(Op, DAG);
6783 return LowerINTRINSIC_VOID(Op, DAG);
6784 case ISD::IS_FPCLASS:
6785 return LowerIS_FPCLASS(Op, DAG);
6786 case ISD::BITREVERSE: {
6787 MVT VT = Op.getSimpleValueType();
6788 if (VT.isFixedLengthVector()) {
6789 assert(Subtarget.hasStdExtZvbb());
6790 return lowerToScalableOp(Op, DAG);
6791 }
6792 SDLoc DL(Op);
6793 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
6794 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
6795 // Expand bitreverse to a bswap(rev8) followed by brev8.
6796 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
6797 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
6798 }
6799 case ISD::TRUNCATE:
6802 // Only custom-lower vector truncates
6803 if (!Op.getSimpleValueType().isVector())
6804 return Op;
6805 return lowerVectorTruncLike(Op, DAG);
6806 case ISD::ANY_EXTEND:
6807 case ISD::ZERO_EXTEND:
6808 if (Op.getOperand(0).getValueType().isVector() &&
6809 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6810 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
6811 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
6812 case ISD::SIGN_EXTEND:
6813 if (Op.getOperand(0).getValueType().isVector() &&
6814 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6815 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
6816 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
6818 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
6820 return lowerINSERT_VECTOR_ELT(Op, DAG);
6822 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
6823 case ISD::SCALAR_TO_VECTOR: {
6824 MVT VT = Op.getSimpleValueType();
6825 SDLoc DL(Op);
6826 SDValue Scalar = Op.getOperand(0);
6827 if (VT.getVectorElementType() == MVT::i1) {
6828 MVT WideVT = VT.changeVectorElementType(MVT::i8);
6829 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
6830 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
6831 }
6832 MVT ContainerVT = VT;
6833 if (VT.isFixedLengthVector())
6834 ContainerVT = getContainerForFixedLengthVector(VT);
6835 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
6836
6837 SDValue V;
6838 if (VT.isFloatingPoint()) {
6839 V = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, ContainerVT,
6840 DAG.getUNDEF(ContainerVT), Scalar, VL);
6841 } else {
6842 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
6843 V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
6844 DAG.getUNDEF(ContainerVT), Scalar, VL);
6845 }
6846 if (VT.isFixedLengthVector())
6847 V = convertFromScalableVector(VT, V, DAG, Subtarget);
6848 return V;
6849 }
6850 case ISD::VSCALE: {
6851 MVT XLenVT = Subtarget.getXLenVT();
6852 MVT VT = Op.getSimpleValueType();
6853 SDLoc DL(Op);
6854 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
6855 // We define our scalable vector types for lmul=1 to use a 64 bit known
6856 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
6857 // vscale as VLENB / 8.
6858 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
6859 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
6860 report_fatal_error("Support for VLEN==32 is incomplete.");
6861 // We assume VLENB is a multiple of 8. We manually choose the best shift
6862 // here because SimplifyDemandedBits isn't always able to simplify it.
6863 uint64_t Val = Op.getConstantOperandVal(0);
6864 if (isPowerOf2_64(Val)) {
6865 uint64_t Log2 = Log2_64(Val);
6866 if (Log2 < 3)
6867 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6868 DAG.getConstant(3 - Log2, DL, VT));
6869 else if (Log2 > 3)
6870 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
6871 DAG.getConstant(Log2 - 3, DL, XLenVT));
6872 } else if ((Val % 8) == 0) {
6873 // If the multiplier is a multiple of 8, scale it down to avoid needing
6874 // to shift the VLENB value.
6875 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
6876 DAG.getConstant(Val / 8, DL, XLenVT));
6877 } else {
6878 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6879 DAG.getConstant(3, DL, XLenVT));
6880 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
6881 DAG.getConstant(Val, DL, XLenVT));
6882 }
6883 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6884 }
6885 case ISD::FPOWI: {
6886 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
6887 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
6888 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
6889 Op.getOperand(1).getValueType() == MVT::i32) {
6890 SDLoc DL(Op);
6891 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6892 SDValue Powi =
6893 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
6894 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
6895 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6896 }
6897 return SDValue();
6898 }
6899 case ISD::FMAXIMUM:
6900 case ISD::FMINIMUM:
6901 if (isPromotedOpNeedingSplit(Op, Subtarget))
6902 return SplitVectorOp(Op, DAG);
6903 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
6904 case ISD::FP_EXTEND:
6905 case ISD::FP_ROUND:
6906 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6909 return lowerStrictFPExtendOrRoundLike(Op, DAG);
6910 case ISD::SINT_TO_FP:
6911 case ISD::UINT_TO_FP:
6912 if (Op.getValueType().isVector() &&
6913 ((Op.getValueType().getScalarType() == MVT::f16 &&
6914 (Subtarget.hasVInstructionsF16Minimal() &&
6915 !Subtarget.hasVInstructionsF16())) ||
6916 Op.getValueType().getScalarType() == MVT::bf16)) {
6917 if (isPromotedOpNeedingSplit(Op, Subtarget))
6918 return SplitVectorOp(Op, DAG);
6919 // int -> f32
6920 SDLoc DL(Op);
6921 MVT NVT =
6922 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
6923 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
6924 // f32 -> [b]f16
6925 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
6926 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6927 }
6928 [[fallthrough]];
6929 case ISD::FP_TO_SINT:
6930 case ISD::FP_TO_UINT:
6931 if (SDValue Op1 = Op.getOperand(0);
6932 Op1.getValueType().isVector() &&
6933 ((Op1.getValueType().getScalarType() == MVT::f16 &&
6934 (Subtarget.hasVInstructionsF16Minimal() &&
6935 !Subtarget.hasVInstructionsF16())) ||
6936 Op1.getValueType().getScalarType() == MVT::bf16)) {
6937 if (isPromotedOpNeedingSplit(Op1, Subtarget))
6938 return SplitVectorOp(Op, DAG);
6939 // [b]f16 -> f32
6940 SDLoc DL(Op);
6941 MVT NVT = MVT::getVectorVT(MVT::f32,
6942 Op1.getValueType().getVectorElementCount());
6943 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
6944 // f32 -> int
6945 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
6946 }
6947 [[fallthrough]];
6952 // RVV can only do fp<->int conversions to types half/double the size as
6953 // the source. We custom-lower any conversions that do two hops into
6954 // sequences.
6955 MVT VT = Op.getSimpleValueType();
6956 if (VT.isScalarInteger())
6957 return lowerFP_TO_INT(Op, DAG, Subtarget);
6958 bool IsStrict = Op->isStrictFPOpcode();
6959 SDValue Src = Op.getOperand(0 + IsStrict);
6960 MVT SrcVT = Src.getSimpleValueType();
6961 if (SrcVT.isScalarInteger())
6962 return lowerINT_TO_FP(Op, DAG, Subtarget);
6963 if (!VT.isVector())
6964 return Op;
6965 SDLoc DL(Op);
6966 MVT EltVT = VT.getVectorElementType();
6967 MVT SrcEltVT = SrcVT.getVectorElementType();
6968 unsigned EltSize = EltVT.getSizeInBits();
6969 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
6970 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
6971 "Unexpected vector element types");
6972
6973 bool IsInt2FP = SrcEltVT.isInteger();
6974 // Widening conversions
6975 if (EltSize > (2 * SrcEltSize)) {
6976 if (IsInt2FP) {
6977 // Do a regular integer sign/zero extension then convert to float.
6978 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
6980 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
6981 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
6984 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
6985 if (IsStrict)
6986 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
6987 Op.getOperand(0), Ext);
6988 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
6989 }
6990 // FP2Int
6991 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
6992 // Do one doubling fp_extend then complete the operation by converting
6993 // to int.
6994 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6995 if (IsStrict) {
6996 auto [FExt, Chain] =
6997 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
6998 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
6999 }
7000 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
7001 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
7002 }
7003
7004 // Narrowing conversions
7005 if (SrcEltSize > (2 * EltSize)) {
7006 if (IsInt2FP) {
7007 // One narrowing int_to_fp, then an fp_round.
7008 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
7009 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
7010 if (IsStrict) {
7011 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
7012 DAG.getVTList(InterimFVT, MVT::Other),
7013 Op.getOperand(0), Src);
7014 SDValue Chain = Int2FP.getValue(1);
7015 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
7016 }
7017 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
7018 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
7019 }
7020 // FP2Int
7021 // One narrowing fp_to_int, then truncate the integer. If the float isn't
7022 // representable by the integer, the result is poison.
7023 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
7025 if (IsStrict) {
7026 SDValue FP2Int =
7027 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
7028 Op.getOperand(0), Src);
7029 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
7030 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
7031 }
7032 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
7033 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
7034 }
7035
7036 // Scalable vectors can exit here. Patterns will handle equally-sized
7037 // conversions halving/doubling ones.
7038 if (!VT.isFixedLengthVector())
7039 return Op;
7040
7041 // For fixed-length vectors we lower to a custom "VL" node.
7042 unsigned RVVOpc = 0;
7043 switch (Op.getOpcode()) {
7044 default:
7045 llvm_unreachable("Impossible opcode");
7046 case ISD::FP_TO_SINT:
7048 break;
7049 case ISD::FP_TO_UINT:
7051 break;
7052 case ISD::SINT_TO_FP:
7053 RVVOpc = RISCVISD::SINT_TO_FP_VL;
7054 break;
7055 case ISD::UINT_TO_FP:
7056 RVVOpc = RISCVISD::UINT_TO_FP_VL;
7057 break;
7060 break;
7063 break;
7066 break;
7069 break;
7070 }
7071
7072 MVT ContainerVT = getContainerForFixedLengthVector(VT);
7073 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
7074 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
7075 "Expected same element count");
7076
7077 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
7078
7079 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
7080 if (IsStrict) {
7081 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
7082 Op.getOperand(0), Src, Mask, VL);
7083 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
7084 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
7085 }
7086 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
7087 return convertFromScalableVector(VT, Src, DAG, Subtarget);
7088 }
7091 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
7092 case ISD::FP_TO_BF16: {
7093 // Custom lower to ensure the libcall return is passed in an FPR on hard
7094 // float ABIs.
7095 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
7096 SDLoc DL(Op);
7097 MakeLibCallOptions CallOptions;
7098 RTLIB::Libcall LC =
7099 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
7100 SDValue Res =
7101 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
7102 if (Subtarget.is64Bit())
7103 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
7104 return DAG.getBitcast(MVT::i32, Res);
7105 }
7106 case ISD::BF16_TO_FP: {
7107 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
7108 MVT VT = Op.getSimpleValueType();
7109 SDLoc DL(Op);
7110 Op = DAG.getNode(
7111 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
7112 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
7113 SDValue Res = Subtarget.is64Bit()
7114 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
7115 : DAG.getBitcast(MVT::f32, Op);
7116 // fp_extend if the target VT is bigger than f32.
7117 if (VT != MVT::f32)
7118 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
7119 return Res;
7120 }
7122 case ISD::FP_TO_FP16: {
7123 // Custom lower to ensure the libcall return is passed in an FPR on hard
7124 // float ABIs.
7125 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7126 SDLoc DL(Op);
7127 MakeLibCallOptions CallOptions;
7128 bool IsStrict = Op->isStrictFPOpcode();
7129 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
7130 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7131 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
7132 SDValue Res;
7133 std::tie(Res, Chain) =
7134 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
7135 if (Subtarget.is64Bit())
7136 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
7137 SDValue Result = DAG.getBitcast(MVT::i32, IsStrict ? Res.getValue(0) : Res);
7138 if (IsStrict)
7139 return DAG.getMergeValues({Result, Chain}, DL);
7140 return Result;
7141 }
7143 case ISD::FP16_TO_FP: {
7144 // Custom lower to ensure the libcall argument is passed in an FPR on hard
7145 // float ABIs.
7146 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7147 SDLoc DL(Op);
7148 MakeLibCallOptions CallOptions;
7149 bool IsStrict = Op->isStrictFPOpcode();
7150 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
7151 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7152 SDValue Arg = Subtarget.is64Bit()
7153 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op0)
7154 : DAG.getBitcast(MVT::f32, Op0);
7155 SDValue Res;
7156 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
7157 CallOptions, DL, Chain);
7158 if (IsStrict)
7159 return DAG.getMergeValues({Res, Chain}, DL);
7160 return Res;
7161 }
7162 case ISD::FTRUNC:
7163 case ISD::FCEIL:
7164 case ISD::FFLOOR:
7165 case ISD::FNEARBYINT:
7166 case ISD::FRINT:
7167 case ISD::FROUND:
7168 case ISD::FROUNDEVEN:
7169 if (isPromotedOpNeedingSplit(Op, Subtarget))
7170 return SplitVectorOp(Op, DAG);
7171 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7172 case ISD::LRINT:
7173 case ISD::LLRINT:
7174 if (Op.getValueType().isVector())
7175 return lowerVectorXRINT(Op, DAG, Subtarget);
7176 [[fallthrough]];
7177 case ISD::LROUND:
7178 case ISD::LLROUND: {
7179 assert(Op.getOperand(0).getValueType() == MVT::f16 &&
7180 "Unexpected custom legalisation");
7181 SDLoc DL(Op);
7182 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
7183 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), Ext);
7184 }
7185 case ISD::STRICT_LRINT:
7186 case ISD::STRICT_LLRINT:
7187 case ISD::STRICT_LROUND:
7188 case ISD::STRICT_LLROUND: {
7189 assert(Op.getOperand(1).getValueType() == MVT::f16 &&
7190 "Unexpected custom legalisation");
7191 SDLoc DL(Op);
7192 SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
7193 {Op.getOperand(0), Op.getOperand(1)});
7194 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
7195 {Ext.getValue(1), Ext.getValue(0)});
7196 }
7197 case ISD::VECREDUCE_ADD:
7202 return lowerVECREDUCE(Op, DAG);
7203 case ISD::VECREDUCE_AND:
7204 case ISD::VECREDUCE_OR:
7205 case ISD::VECREDUCE_XOR:
7206 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7207 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
7208 return lowerVECREDUCE(Op, DAG);
7215 return lowerFPVECREDUCE(Op, DAG);
7216 case ISD::VP_REDUCE_ADD:
7217 case ISD::VP_REDUCE_UMAX:
7218 case ISD::VP_REDUCE_SMAX:
7219 case ISD::VP_REDUCE_UMIN:
7220 case ISD::VP_REDUCE_SMIN:
7221 case ISD::VP_REDUCE_FADD:
7222 case ISD::VP_REDUCE_SEQ_FADD:
7223 case ISD::VP_REDUCE_FMIN:
7224 case ISD::VP_REDUCE_FMAX:
7225 case ISD::VP_REDUCE_FMINIMUM:
7226 case ISD::VP_REDUCE_FMAXIMUM:
7227 if (isPromotedOpNeedingSplit(Op.getOperand(1), Subtarget))
7228 return SplitVectorReductionOp(Op, DAG);
7229 return lowerVPREDUCE(Op, DAG);
7230 case ISD::VP_REDUCE_AND:
7231 case ISD::VP_REDUCE_OR:
7232 case ISD::VP_REDUCE_XOR:
7233 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
7234 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
7235 return lowerVPREDUCE(Op, DAG);
7236 case ISD::VP_CTTZ_ELTS:
7237 case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
7238 return lowerVPCttzElements(Op, DAG);
7239 case ISD::UNDEF: {
7240 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
7241 return convertFromScalableVector(Op.getSimpleValueType(),
7242 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
7243 }
7245 return lowerINSERT_SUBVECTOR(Op, DAG);
7247 return lowerEXTRACT_SUBVECTOR(Op, DAG);
7249 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
7251 return lowerVECTOR_INTERLEAVE(Op, DAG);
7252 case ISD::STEP_VECTOR:
7253 return lowerSTEP_VECTOR(Op, DAG);
7255 return lowerVECTOR_REVERSE(Op, DAG);
7256 case ISD::VECTOR_SPLICE:
7257 return lowerVECTOR_SPLICE(Op, DAG);
7258 case ISD::BUILD_VECTOR:
7259 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
7260 case ISD::SPLAT_VECTOR: {
7261 MVT VT = Op.getSimpleValueType();
7262 MVT EltVT = VT.getVectorElementType();
7263 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
7264 EltVT == MVT::bf16) {
7265 SDLoc DL(Op);
7266 SDValue Elt;
7267 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
7268 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
7269 Elt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(),
7270 Op.getOperand(0));
7271 else
7272 Elt = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Op.getOperand(0));
7273 MVT IVT = VT.changeVectorElementType(MVT::i16);
7274 return DAG.getNode(ISD::BITCAST, DL, VT,
7275 DAG.getNode(ISD::SPLAT_VECTOR, DL, IVT, Elt));
7276 }
7277
7278 if (EltVT == MVT::i1)
7279 return lowerVectorMaskSplat(Op, DAG);
7280 return SDValue();
7281 }
7283 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
7284 case ISD::CONCAT_VECTORS: {
7285 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
7286 // better than going through the stack, as the default expansion does.
7287 SDLoc DL(Op);
7288 MVT VT = Op.getSimpleValueType();
7289 MVT ContainerVT = VT;
7290 if (VT.isFixedLengthVector())
7291 ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
7292
7293 // Recursively split concat_vectors with more than 2 operands:
7294 //
7295 // concat_vector op1, op2, op3, op4
7296 // ->
7297 // concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
7298 //
7299 // This reduces the length of the chain of vslideups and allows us to
7300 // perform the vslideups at a smaller LMUL, limited to MF2.
7301 if (Op.getNumOperands() > 2 &&
7302 ContainerVT.bitsGE(getLMUL1VT(ContainerVT))) {
7303 MVT HalfVT = VT.getHalfNumVectorElementsVT();
7305 size_t HalfNumOps = Op.getNumOperands() / 2;
7306 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
7307 Op->ops().take_front(HalfNumOps));
7308 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
7309 Op->ops().drop_front(HalfNumOps));
7310 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
7311 }
7312
7313 unsigned NumOpElts =
7314 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
7315 SDValue Vec = DAG.getUNDEF(VT);
7316 for (const auto &OpIdx : enumerate(Op->ops())) {
7317 SDValue SubVec = OpIdx.value();
7318 // Don't insert undef subvectors.
7319 if (SubVec.isUndef())
7320 continue;
7321 Vec =
7322 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
7323 DAG.getVectorIdxConstant(OpIdx.index() * NumOpElts, DL));
7324 }
7325 return Vec;
7326 }
7327 case ISD::LOAD: {
7328 auto *Load = cast<LoadSDNode>(Op);
7329 EVT VecTy = Load->getMemoryVT();
7330 // Handle normal vector tuple load.
7331 if (VecTy.isRISCVVectorTuple()) {
7332 SDLoc DL(Op);
7333 MVT XLenVT = Subtarget.getXLenVT();
7334 unsigned NF = VecTy.getRISCVVectorTupleNumFields();
7335 unsigned Sz = VecTy.getSizeInBits().getKnownMinValue();
7336 unsigned NumElts = Sz / (NF * 8);
7337 int Log2LMUL = Log2_64(NumElts) - 3;
7338
7339 auto Flag = SDNodeFlags();
7340 Flag.setNoUnsignedWrap(true);
7341 SDValue Ret = DAG.getUNDEF(VecTy);
7342 SDValue BasePtr = Load->getBasePtr();
7343 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
7344 VROffset =
7345 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
7346 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
7347 SmallVector<SDValue, 8> OutChains;
7348
7349 // Load NF vector registers and combine them to a vector tuple.
7350 for (unsigned i = 0; i < NF; ++i) {
7351 SDValue LoadVal = DAG.getLoad(
7352 MVT::getScalableVectorVT(MVT::i8, NumElts), DL, Load->getChain(),
7353 BasePtr, MachinePointerInfo(Load->getAddressSpace()), Align(8));
7354 OutChains.push_back(LoadVal.getValue(1));
7355 Ret = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VecTy, Ret, LoadVal,
7356 DAG.getVectorIdxConstant(i, DL));
7357 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
7358 }
7359 return DAG.getMergeValues(
7360 {Ret, DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains)}, DL);
7361 }
7362
7363 if (auto V = expandUnalignedRVVLoad(Op, DAG))
7364 return V;
7365 if (Op.getValueType().isFixedLengthVector())
7366 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
7367 return Op;
7368 }
7369 case ISD::STORE: {
7370 auto *Store = cast<StoreSDNode>(Op);
7371 SDValue StoredVal = Store->getValue();
7372 EVT VecTy = StoredVal.getValueType();
7373 // Handle normal vector tuple store.
7374 if (VecTy.isRISCVVectorTuple()) {
7375 SDLoc DL(Op);
7376 MVT XLenVT = Subtarget.getXLenVT();
7377 unsigned NF = VecTy.getRISCVVectorTupleNumFields();
7378 unsigned Sz = VecTy.getSizeInBits().getKnownMinValue();
7379 unsigned NumElts = Sz / (NF * 8);
7380 int Log2LMUL = Log2_64(NumElts) - 3;
7381
7382 auto Flag = SDNodeFlags();
7383 Flag.setNoUnsignedWrap(true);
7384 SDValue Ret;
7385 SDValue Chain = Store->getChain();
7386 SDValue BasePtr = Store->getBasePtr();
7387 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
7388 VROffset =
7389 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
7390 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
7391
7392 // Extract subregisters in a vector tuple and store them individually.
7393 for (unsigned i = 0; i < NF; ++i) {
7394 auto Extract = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL,
7395 MVT::getScalableVectorVT(MVT::i8, NumElts),
7396 StoredVal, DAG.getVectorIdxConstant(i, DL));
7397 Ret = DAG.getStore(Chain, DL, Extract, BasePtr,
7398 MachinePointerInfo(Store->getAddressSpace()),
7399 Store->getOriginalAlign(),
7400 Store->getMemOperand()->getFlags());
7401 Chain = Ret.getValue(0);
7402 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
7403 }
7404 return Ret;
7405 }
7406
7407 if (auto V = expandUnalignedRVVStore(Op, DAG))
7408 return V;
7409 if (Op.getOperand(1).getValueType().isFixedLengthVector())
7410 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
7411 return Op;
7412 }
7413 case ISD::MLOAD:
7414 case ISD::VP_LOAD:
7415 return lowerMaskedLoad(Op, DAG);
7416 case ISD::MSTORE:
7417 case ISD::VP_STORE:
7418 return lowerMaskedStore(Op, DAG);
7420 return lowerVectorCompress(Op, DAG);
7421 case ISD::SELECT_CC: {
7422 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
7423 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
7424 // into separate SETCC+SELECT just like LegalizeDAG.
7425 SDValue Tmp1 = Op.getOperand(0);
7426 SDValue Tmp2 = Op.getOperand(1);
7427 SDValue True = Op.getOperand(2);
7428 SDValue False = Op.getOperand(3);
7429 EVT VT = Op.getValueType();
7430 SDValue CC = Op.getOperand(4);
7431 EVT CmpVT = Tmp1.getValueType();
7432 EVT CCVT =
7433 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
7434 SDLoc DL(Op);
7435 SDValue Cond =
7436 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
7437 return DAG.getSelect(DL, VT, Cond, True, False);
7438 }
7439 case ISD::SETCC: {
7440 MVT OpVT = Op.getOperand(0).getSimpleValueType();
7441 if (OpVT.isScalarInteger()) {
7442 MVT VT = Op.getSimpleValueType();
7443 SDValue LHS = Op.getOperand(0);
7444 SDValue RHS = Op.getOperand(1);
7445 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
7446 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
7447 "Unexpected CondCode");
7448
7449 SDLoc DL(Op);
7450
7451 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
7452 // convert this to the equivalent of (set(u)ge X, C+1) by using
7453 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
7454 // in a register.
7455 if (isa<ConstantSDNode>(RHS)) {
7456 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
7457 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
7458 // If this is an unsigned compare and the constant is -1, incrementing
7459 // the constant would change behavior. The result should be false.
7460 if (CCVal == ISD::SETUGT && Imm == -1)
7461 return DAG.getConstant(0, DL, VT);
7462 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
7463 CCVal = ISD::getSetCCSwappedOperands(CCVal);
7464 SDValue SetCC = DAG.getSetCC(
7465 DL, VT, LHS, DAG.getSignedConstant(Imm + 1, DL, OpVT), CCVal);
7466 return DAG.getLogicalNOT(DL, SetCC, VT);
7467 }
7468 }
7469
7470 // Not a constant we could handle, swap the operands and condition code to
7471 // SETLT/SETULT.
7472 CCVal = ISD::getSetCCSwappedOperands(CCVal);
7473 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
7474 }
7475
7476 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
7477 return SplitVectorOp(Op, DAG);
7478
7479 return lowerFixedLengthVectorSetccToRVV(Op, DAG);
7480 }
7481 case ISD::ADD:
7482 case ISD::SUB:
7483 case ISD::MUL:
7484 case ISD::MULHS:
7485 case ISD::MULHU:
7486 case ISD::AND:
7487 case ISD::OR:
7488 case ISD::XOR:
7489 case ISD::SDIV:
7490 case ISD::SREM:
7491 case ISD::UDIV:
7492 case ISD::UREM:
7493 case ISD::BSWAP:
7494 case ISD::CTPOP:
7495 return lowerToScalableOp(Op, DAG);
7496 case ISD::SHL:
7497 case ISD::SRA:
7498 case ISD::SRL:
7499 if (Op.getSimpleValueType().isFixedLengthVector())
7500 return lowerToScalableOp(Op, DAG);
7501 // This can be called for an i32 shift amount that needs to be promoted.
7502 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
7503 "Unexpected custom legalisation");
7504 return SDValue();
7505 case ISD::FABS:
7506 case ISD::FNEG:
7507 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
7508 return lowerFABSorFNEG(Op, DAG, Subtarget);
7509 [[fallthrough]];
7510 case ISD::FADD:
7511 case ISD::FSUB:
7512 case ISD::FMUL:
7513 case ISD::FDIV:
7514 case ISD::FSQRT:
7515 case ISD::FMA:
7516 case ISD::FMINNUM:
7517 case ISD::FMAXNUM:
7518 if (isPromotedOpNeedingSplit(Op, Subtarget))
7519 return SplitVectorOp(Op, DAG);
7520 [[fallthrough]];
7521 case ISD::AVGFLOORS:
7522 case ISD::AVGFLOORU:
7523 case ISD::AVGCEILS:
7524 case ISD::AVGCEILU:
7525 case ISD::SMIN:
7526 case ISD::SMAX:
7527 case ISD::UMIN:
7528 case ISD::UMAX:
7529 case ISD::UADDSAT:
7530 case ISD::USUBSAT:
7531 case ISD::SADDSAT:
7532 case ISD::SSUBSAT:
7533 return lowerToScalableOp(Op, DAG);
7534 case ISD::ABDS:
7535 case ISD::ABDU: {
7536 SDLoc dl(Op);
7537 EVT VT = Op->getValueType(0);
7538 SDValue LHS = DAG.getFreeze(Op->getOperand(0));
7539 SDValue RHS = DAG.getFreeze(Op->getOperand(1));
7540 bool IsSigned = Op->getOpcode() == ISD::ABDS;
7541
7542 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
7543 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
7544 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
7545 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
7546 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
7547 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
7548 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
7549 }
7550 case ISD::ABS:
7551 case ISD::VP_ABS:
7552 return lowerABS(Op, DAG);
7553 case ISD::CTLZ:
7555 case ISD::CTTZ:
7557 if (Subtarget.hasStdExtZvbb())
7558 return lowerToScalableOp(Op, DAG);
7559 assert(Op.getOpcode() != ISD::CTTZ);
7560 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7561 case ISD::VSELECT:
7562 return lowerFixedLengthVectorSelectToRVV(Op, DAG);
7563 case ISD::FCOPYSIGN:
7564 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
7565 return lowerFCOPYSIGN(Op, DAG, Subtarget);
7566 if (isPromotedOpNeedingSplit(Op, Subtarget))
7567 return SplitVectorOp(Op, DAG);
7568 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
7569 case ISD::STRICT_FADD:
7570 case ISD::STRICT_FSUB:
7571 case ISD::STRICT_FMUL:
7572 case ISD::STRICT_FDIV:
7573 case ISD::STRICT_FSQRT:
7574 case ISD::STRICT_FMA:
7575 if (isPromotedOpNeedingSplit(Op, Subtarget))
7576 return SplitStrictFPVectorOp(Op, DAG);
7577 return lowerToScalableOp(Op, DAG);
7578 case ISD::STRICT_FSETCC:
7580 return lowerVectorStrictFSetcc(Op, DAG);
7581 case ISD::STRICT_FCEIL:
7582 case ISD::STRICT_FRINT:
7583 case ISD::STRICT_FFLOOR:
7584 case ISD::STRICT_FTRUNC:
7586 case ISD::STRICT_FROUND:
7588 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7589 case ISD::MGATHER:
7590 case ISD::VP_GATHER:
7591 return lowerMaskedGather(Op, DAG);
7592 case ISD::MSCATTER:
7593 case ISD::VP_SCATTER:
7594 return lowerMaskedScatter(Op, DAG);
7595 case ISD::GET_ROUNDING:
7596 return lowerGET_ROUNDING(Op, DAG);
7597 case ISD::SET_ROUNDING:
7598 return lowerSET_ROUNDING(Op, DAG);
7599 case ISD::EH_DWARF_CFA:
7600 return lowerEH_DWARF_CFA(Op, DAG);
7601 case ISD::VP_MERGE:
7602 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7603 return lowerVPMergeMask(Op, DAG);
7604 [[fallthrough]];
7605 case ISD::VP_SELECT:
7606 case ISD::VP_ADD:
7607 case ISD::VP_SUB:
7608 case ISD::VP_MUL:
7609 case ISD::VP_SDIV:
7610 case ISD::VP_UDIV:
7611 case ISD::VP_SREM:
7612 case ISD::VP_UREM:
7613 case ISD::VP_UADDSAT:
7614 case ISD::VP_USUBSAT:
7615 case ISD::VP_SADDSAT:
7616 case ISD::VP_SSUBSAT:
7617 case ISD::VP_LRINT:
7618 case ISD::VP_LLRINT:
7619 return lowerVPOp(Op, DAG);
7620 case ISD::VP_AND:
7621 case ISD::VP_OR:
7622 case ISD::VP_XOR:
7623 return lowerLogicVPOp(Op, DAG);
7624 case ISD::VP_FADD:
7625 case ISD::VP_FSUB:
7626 case ISD::VP_FMUL:
7627 case ISD::VP_FDIV:
7628 case ISD::VP_FNEG:
7629 case ISD::VP_FABS:
7630 case ISD::VP_SQRT:
7631 case ISD::VP_FMA:
7632 case ISD::VP_FMINNUM:
7633 case ISD::VP_FMAXNUM:
7634 case ISD::VP_FCOPYSIGN:
7635 if (isPromotedOpNeedingSplit(Op, Subtarget))
7636 return SplitVPOp(Op, DAG);
7637 [[fallthrough]];
7638 case ISD::VP_SRA:
7639 case ISD::VP_SRL:
7640 case ISD::VP_SHL:
7641 return lowerVPOp(Op, DAG);
7642 case ISD::VP_IS_FPCLASS:
7643 return LowerIS_FPCLASS(Op, DAG);
7644 case ISD::VP_SIGN_EXTEND:
7645 case ISD::VP_ZERO_EXTEND:
7646 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7647 return lowerVPExtMaskOp(Op, DAG);
7648 return lowerVPOp(Op, DAG);
7649 case ISD::VP_TRUNCATE:
7650 return lowerVectorTruncLike(Op, DAG);
7651 case ISD::VP_FP_EXTEND:
7652 case ISD::VP_FP_ROUND:
7653 return lowerVectorFPExtendOrRoundLike(Op, DAG);
7654 case ISD::VP_SINT_TO_FP:
7655 case ISD::VP_UINT_TO_FP:
7656 if (Op.getValueType().isVector() &&
7657 ((Op.getValueType().getScalarType() == MVT::f16 &&
7658 (Subtarget.hasVInstructionsF16Minimal() &&
7659 !Subtarget.hasVInstructionsF16())) ||
7660 Op.getValueType().getScalarType() == MVT::bf16)) {
7661 if (isPromotedOpNeedingSplit(Op, Subtarget))
7662 return SplitVectorOp(Op, DAG);
7663 // int -> f32
7664 SDLoc DL(Op);
7665 MVT NVT =
7666 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
7667 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
7668 // f32 -> [b]f16
7669 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
7670 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7671 }
7672 [[fallthrough]];
7673 case ISD::VP_FP_TO_SINT:
7674 case ISD::VP_FP_TO_UINT:
7675 if (SDValue Op1 = Op.getOperand(0);
7676 Op1.getValueType().isVector() &&
7677 ((Op1.getValueType().getScalarType() == MVT::f16 &&
7678 (Subtarget.hasVInstructionsF16Minimal() &&
7679 !Subtarget.hasVInstructionsF16())) ||
7680 Op1.getValueType().getScalarType() == MVT::bf16)) {
7681 if (isPromotedOpNeedingSplit(Op1, Subtarget))
7682 return SplitVectorOp(Op, DAG);
7683 // [b]f16 -> f32
7684 SDLoc DL(Op);
7685 MVT NVT = MVT::getVectorVT(MVT::f32,
7686 Op1.getValueType().getVectorElementCount());
7687 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
7688 // f32 -> int
7689 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7690 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
7691 }
7692 return lowerVPFPIntConvOp(Op, DAG);
7693 case ISD::VP_SETCC:
7694 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
7695 return SplitVPOp(Op, DAG);
7696 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7697 return lowerVPSetCCMaskOp(Op, DAG);
7698 [[fallthrough]];
7699 case ISD::VP_SMIN:
7700 case ISD::VP_SMAX:
7701 case ISD::VP_UMIN:
7702 case ISD::VP_UMAX:
7703 case ISD::VP_BITREVERSE:
7704 case ISD::VP_BSWAP:
7705 return lowerVPOp(Op, DAG);
7706 case ISD::VP_CTLZ:
7707 case ISD::VP_CTLZ_ZERO_UNDEF:
7708 if (Subtarget.hasStdExtZvbb())
7709 return lowerVPOp(Op, DAG);
7710 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7711 case ISD::VP_CTTZ:
7712 case ISD::VP_CTTZ_ZERO_UNDEF:
7713 if (Subtarget.hasStdExtZvbb())
7714 return lowerVPOp(Op, DAG);
7715 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7716 case ISD::VP_CTPOP:
7717 return lowerVPOp(Op, DAG);
7718 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
7719 return lowerVPStridedLoad(Op, DAG);
7720 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
7721 return lowerVPStridedStore(Op, DAG);
7722 case ISD::VP_FCEIL:
7723 case ISD::VP_FFLOOR:
7724 case ISD::VP_FRINT:
7725 case ISD::VP_FNEARBYINT:
7726 case ISD::VP_FROUND:
7727 case ISD::VP_FROUNDEVEN:
7728 case ISD::VP_FROUNDTOZERO:
7729 if (isPromotedOpNeedingSplit(Op, Subtarget))
7730 return SplitVPOp(Op, DAG);
7731 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7732 case ISD::VP_FMAXIMUM:
7733 case ISD::VP_FMINIMUM:
7734 if (isPromotedOpNeedingSplit(Op, Subtarget))
7735 return SplitVPOp(Op, DAG);
7736 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
7737 case ISD::EXPERIMENTAL_VP_SPLICE:
7738 return lowerVPSpliceExperimental(Op, DAG);
7739 case ISD::EXPERIMENTAL_VP_REVERSE:
7740 return lowerVPReverseExperimental(Op, DAG);
7741 case ISD::EXPERIMENTAL_VP_SPLAT:
7742 return lowerVPSplatExperimental(Op, DAG);
7743 case ISD::CLEAR_CACHE: {
7744 assert(getTargetMachine().getTargetTriple().isOSLinux() &&
7745 "llvm.clear_cache only needs custom lower on Linux targets");
7746 SDLoc DL(Op);
7747 SDValue Flags = DAG.getConstant(0, DL, Subtarget.getXLenVT());
7748 return emitFlushICache(DAG, Op.getOperand(0), Op.getOperand(1),
7749 Op.getOperand(2), Flags, DL);
7750 }
7752 return lowerDYNAMIC_STACKALLOC(Op, DAG);
7754 return lowerINIT_TRAMPOLINE(Op, DAG);
7756 return lowerADJUST_TRAMPOLINE(Op, DAG);
7757 }
7758}
7759
7760SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG, SDValue InChain,
7761 SDValue Start, SDValue End,
7762 SDValue Flags, SDLoc DL) const {
7763 MakeLibCallOptions CallOptions;
7764 std::pair<SDValue, SDValue> CallResult =
7765 makeLibCall(DAG, RTLIB::RISCV_FLUSH_ICACHE, MVT::isVoid,
7766 {Start, End, Flags}, CallOptions, DL, InChain);
7767
7768 // This function returns void so only the out chain matters.
7769 return CallResult.second;
7770}
7771
7772SDValue RISCVTargetLowering::lowerINIT_TRAMPOLINE(SDValue Op,
7773 SelectionDAG &DAG) const {
7774 if (!Subtarget.is64Bit())
7775 llvm::report_fatal_error("Trampolines only implemented for RV64");
7776
7777 // Create an MCCodeEmitter to encode instructions.
7779 assert(TLO);
7780 MCContext &MCCtx = TLO->getContext();
7781
7782 std::unique_ptr<MCCodeEmitter> CodeEmitter(
7783 createRISCVMCCodeEmitter(*getTargetMachine().getMCInstrInfo(), MCCtx));
7784
7785 SDValue Root = Op.getOperand(0);
7786 SDValue Trmp = Op.getOperand(1); // trampoline
7787 SDLoc dl(Op);
7788
7789 const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
7790
7791 // We store in the trampoline buffer the following instructions and data.
7792 // Offset:
7793 // 0: auipc t2, 0
7794 // 4: ld t0, 24(t2)
7795 // 8: ld t2, 16(t2)
7796 // 12: jalr t0
7797 // 16: <StaticChainOffset>
7798 // 24: <FunctionAddressOffset>
7799 // 32:
7800
7801 constexpr unsigned StaticChainOffset = 16;
7802 constexpr unsigned FunctionAddressOffset = 24;
7803
7805 assert(STI);
7806 auto GetEncoding = [&](const MCInst &MC) {
7809 CodeEmitter->encodeInstruction(MC, CB, Fixups, *STI);
7810 uint32_t Encoding = support::endian::read32le(CB.data());
7811 return Encoding;
7812 };
7813
7814 SDValue OutChains[6];
7815
7816 uint32_t Encodings[] = {
7817 // auipc t2, 0
7818 // Loads the current PC into t2.
7819 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X7).addImm(0)),
7820 // ld t0, 24(t2)
7821 // Loads the function address into t0. Note that we are using offsets
7822 // pc-relative to the first instruction of the trampoline.
7823 GetEncoding(
7824 MCInstBuilder(RISCV::LD).addReg(RISCV::X5).addReg(RISCV::X7).addImm(
7825 FunctionAddressOffset)),
7826 // ld t2, 16(t2)
7827 // Load the value of the static chain.
7828 GetEncoding(
7829 MCInstBuilder(RISCV::LD).addReg(RISCV::X7).addReg(RISCV::X7).addImm(
7830 StaticChainOffset)),
7831 // jalr t0
7832 // Jump to the function.
7833 GetEncoding(MCInstBuilder(RISCV::JALR)
7834 .addReg(RISCV::X0)
7835 .addReg(RISCV::X5)
7836 .addImm(0))};
7837
7838 // Store encoded instructions.
7839 for (auto [Idx, Encoding] : llvm::enumerate(Encodings)) {
7840 SDValue Addr = Idx > 0 ? DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
7841 DAG.getConstant(Idx * 4, dl, MVT::i64))
7842 : Trmp;
7843 OutChains[Idx] = DAG.getTruncStore(
7844 Root, dl, DAG.getConstant(Encoding, dl, MVT::i64), Addr,
7845 MachinePointerInfo(TrmpAddr, Idx * 4), MVT::i32);
7846 }
7847
7848 // Now store the variable part of the trampoline.
7849 SDValue FunctionAddress = Op.getOperand(2);
7850 SDValue StaticChain = Op.getOperand(3);
7851
7852 // Store the given static chain and function pointer in the trampoline buffer.
7853 struct OffsetValuePair {
7854 const unsigned Offset;
7855 const SDValue Value;
7856 SDValue Addr = SDValue(); // Used to cache the address.
7857 } OffsetValues[] = {
7858 {StaticChainOffset, StaticChain},
7859 {FunctionAddressOffset, FunctionAddress},
7860 };
7861 for (auto [Idx, OffsetValue] : llvm::enumerate(OffsetValues)) {
7862 SDValue Addr =
7863 DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
7864 DAG.getConstant(OffsetValue.Offset, dl, MVT::i64));
7865 OffsetValue.Addr = Addr;
7866 OutChains[Idx + 4] =
7867 DAG.getStore(Root, dl, OffsetValue.Value, Addr,
7868 MachinePointerInfo(TrmpAddr, OffsetValue.Offset));
7869 }
7870
7871 SDValue StoreToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
7872
7873 // The end of instructions of trampoline is the same as the static chain
7874 // address that we computed earlier.
7875 SDValue EndOfTrmp = OffsetValues[0].Addr;
7876
7877 // Call clear cache on the trampoline instructions.
7878 SDValue Chain = DAG.getNode(ISD::CLEAR_CACHE, dl, MVT::Other, StoreToken,
7879 Trmp, EndOfTrmp);
7880
7881 return Chain;
7882}
7883
7884SDValue RISCVTargetLowering::lowerADJUST_TRAMPOLINE(SDValue Op,
7885 SelectionDAG &DAG) const {
7886 if (!Subtarget.is64Bit())
7887 llvm::report_fatal_error("Trampolines only implemented for RV64");
7888
7889 return Op.getOperand(0);
7890}
7891
7893 SelectionDAG &DAG, unsigned Flags) {
7894 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
7895}
7896
7898 SelectionDAG &DAG, unsigned Flags) {
7899 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
7900 Flags);
7901}
7902
7904 SelectionDAG &DAG, unsigned Flags) {
7905 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
7906 N->getOffset(), Flags);
7907}
7908
7910 SelectionDAG &DAG, unsigned Flags) {
7911 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
7912}
7913
7915 EVT Ty, SelectionDAG &DAG) {
7917 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
7918 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
7919 return DAG.getLoad(
7920 Ty, DL, DAG.getEntryNode(), LC,
7922}
7923
7925 EVT Ty, SelectionDAG &DAG) {
7927 RISCVConstantPoolValue::Create(*DAG.getContext(), N->getSymbol());
7928 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
7929 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
7930 return DAG.getLoad(
7931 Ty, DL, DAG.getEntryNode(), LC,
7933}
7934
7935template <class NodeTy>
7936SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
7937 bool IsLocal, bool IsExternWeak) const {
7938 SDLoc DL(N);
7939 EVT Ty = getPointerTy(DAG.getDataLayout());
7940
7941 // When HWASAN is used and tagging of global variables is enabled
7942 // they should be accessed via the GOT, since the tagged address of a global
7943 // is incompatible with existing code models. This also applies to non-pic
7944 // mode.
7945 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
7946 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7947 if (IsLocal && !Subtarget.allowTaggedGlobals())
7948 // Use PC-relative addressing to access the symbol. This generates the
7949 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
7950 // %pcrel_lo(auipc)).
7951 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7952
7953 // Use PC-relative addressing to access the GOT for this symbol, then load
7954 // the address from the GOT. This generates the pattern (PseudoLGA sym),
7955 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7956 SDValue Load =
7957 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7963 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7964 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7965 return Load;
7966 }
7967
7968 switch (getTargetMachine().getCodeModel()) {
7969 default:
7970 report_fatal_error("Unsupported code model for lowering");
7971 case CodeModel::Small: {
7972 // Generate a sequence for accessing addresses within the first 2 GiB of
7973 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
7974 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
7975 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
7976 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
7977 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
7978 }
7979 case CodeModel::Medium: {
7980 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7981 if (IsExternWeak) {
7982 // An extern weak symbol may be undefined, i.e. have value 0, which may
7983 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
7984 // symbol. This generates the pattern (PseudoLGA sym), which expands to
7985 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7986 SDValue Load =
7987 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7993 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7994 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7995 return Load;
7996 }
7997
7998 // Generate a sequence for accessing addresses within any 2GiB range within
7999 // the address space. This generates the pattern (PseudoLLA sym), which
8000 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
8001 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
8002 }
8003 case CodeModel::Large: {
8004 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N))
8005 return getLargeGlobalAddress(G, DL, Ty, DAG);
8006
8007 // Using pc-relative mode for other node type.
8008 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8009 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
8010 }
8011 }
8012}
8013
8014SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
8015 SelectionDAG &DAG) const {
8016 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
8017 assert(N->getOffset() == 0 && "unexpected offset in global node");
8018 const GlobalValue *GV = N->getGlobal();
8019 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
8020}
8021
8022SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
8023 SelectionDAG &DAG) const {
8024 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
8025
8026 return getAddr(N, DAG);
8027}
8028
8029SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
8030 SelectionDAG &DAG) const {
8031 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
8032
8033 return getAddr(N, DAG);
8034}
8035
8036SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
8037 SelectionDAG &DAG) const {
8038 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
8039
8040 return getAddr(N, DAG);
8041}
8042
8043SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
8044 SelectionDAG &DAG,
8045 bool UseGOT) const {
8046 SDLoc DL(N);
8047 EVT Ty = getPointerTy(DAG.getDataLayout());
8048 const GlobalValue *GV = N->getGlobal();
8049 MVT XLenVT = Subtarget.getXLenVT();
8050
8051 if (UseGOT) {
8052 // Use PC-relative addressing to access the GOT for this TLS symbol, then
8053 // load the address from the GOT and add the thread pointer. This generates
8054 // the pattern (PseudoLA_TLS_IE sym), which expands to
8055 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
8056 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
8057 SDValue Load =
8058 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
8064 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
8065 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
8066
8067 // Add the thread pointer.
8068 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
8069 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
8070 }
8071
8072 // Generate a sequence for accessing the address relative to the thread
8073 // pointer, with the appropriate adjustment for the thread pointer offset.
8074 // This generates the pattern
8075 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
8076 SDValue AddrHi =
8078 SDValue AddrAdd =
8080 SDValue AddrLo =
8082
8083 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
8084 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
8085 SDValue MNAdd =
8086 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
8087 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
8088}
8089
8090SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
8091 SelectionDAG &DAG) const {
8092 SDLoc DL(N);
8093 EVT Ty = getPointerTy(DAG.getDataLayout());
8094 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
8095 const GlobalValue *GV = N->getGlobal();
8096
8097 // Use a PC-relative addressing mode to access the global dynamic GOT address.
8098 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
8099 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
8100 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
8101 SDValue Load =
8102 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
8103
8104 // Prepare argument list to generate call.
8106 ArgListEntry Entry;
8107 Entry.Node = Load;
8108 Entry.Ty = CallTy;
8109 Args.push_back(Entry);
8110
8111 // Setup call to __tls_get_addr.
8113 CLI.setDebugLoc(DL)
8114 .setChain(DAG.getEntryNode())
8115 .setLibCallee(CallingConv::C, CallTy,
8116 DAG.getExternalSymbol("__tls_get_addr", Ty),
8117 std::move(Args));
8118
8119 return LowerCallTo(CLI).first;
8120}
8121
8122SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
8123 SelectionDAG &DAG) const {
8124 SDLoc DL(N);
8125 EVT Ty = getPointerTy(DAG.getDataLayout());
8126 const GlobalValue *GV = N->getGlobal();
8127
8128 // Use a PC-relative addressing mode to access the global dynamic GOT address.
8129 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
8130 //
8131 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
8132 // lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
8133 // addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)
8134 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
8135 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
8136 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);
8137}
8138
8139SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
8140 SelectionDAG &DAG) const {
8141 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
8142 assert(N->getOffset() == 0 && "unexpected offset in global node");
8143
8144 if (DAG.getTarget().useEmulatedTLS())
8145 return LowerToTLSEmulatedModel(N, DAG);
8146
8148
8151 report_fatal_error("In GHC calling convention TLS is not supported");
8152
8153 SDValue Addr;
8154 switch (Model) {
8156 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
8157 break;
8159 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
8160 break;
8163 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
8164 : getDynamicTLSAddr(N, DAG);
8165 break;
8166 }
8167
8168 return Addr;
8169}
8170
8171// Return true if Val is equal to (setcc LHS, RHS, CC).
8172// Return false if Val is the inverse of (setcc LHS, RHS, CC).
8173// Otherwise, return std::nullopt.
8174static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
8175 ISD::CondCode CC, SDValue Val) {
8176 assert(Val->getOpcode() == ISD::SETCC);
8177 SDValue LHS2 = Val.getOperand(0);
8178 SDValue RHS2 = Val.getOperand(1);
8179 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
8180
8181 if (LHS == LHS2 && RHS == RHS2) {
8182 if (CC == CC2)
8183 return true;
8184 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
8185 return false;
8186 } else if (LHS == RHS2 && RHS == LHS2) {
8188 if (CC == CC2)
8189 return true;
8190 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
8191 return false;
8192 }
8193
8194 return std::nullopt;
8195}
8196
8198 const RISCVSubtarget &Subtarget) {
8199 SDValue CondV = N->getOperand(0);
8200 SDValue TrueV = N->getOperand(1);
8201 SDValue FalseV = N->getOperand(2);
8202 MVT VT = N->getSimpleValueType(0);
8203 SDLoc DL(N);
8204
8205 if (!Subtarget.hasConditionalMoveFusion()) {
8206 // (select c, -1, y) -> -c | y
8207 if (isAllOnesConstant(TrueV)) {
8208 SDValue Neg = DAG.getNegative(CondV, DL, VT);
8209 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
8210 }
8211 // (select c, y, -1) -> (c-1) | y
8212 if (isAllOnesConstant(FalseV)) {
8213 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
8214 DAG.getAllOnesConstant(DL, VT));
8215 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
8216 }
8217
8218 // (select c, 0, y) -> (c-1) & y
8219 if (isNullConstant(TrueV)) {
8220 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
8221 DAG.getAllOnesConstant(DL, VT));
8222 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
8223 }
8224 // (select c, y, 0) -> -c & y
8225 if (isNullConstant(FalseV)) {
8226 SDValue Neg = DAG.getNegative(CondV, DL, VT);
8227 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
8228 }
8229 }
8230
8231 // select c, ~x, x --> xor -c, x
8232 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
8233 const APInt &TrueVal = TrueV->getAsAPIntVal();
8234 const APInt &FalseVal = FalseV->getAsAPIntVal();
8235 if (~TrueVal == FalseVal) {
8236 SDValue Neg = DAG.getNegative(CondV, DL, VT);
8237 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
8238 }
8239 }
8240
8241 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
8242 // when both truev and falsev are also setcc.
8243 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
8244 FalseV.getOpcode() == ISD::SETCC) {
8245 SDValue LHS = CondV.getOperand(0);
8246 SDValue RHS = CondV.getOperand(1);
8247 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
8248
8249 // (select x, x, y) -> x | y
8250 // (select !x, x, y) -> x & y
8251 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
8252 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
8253 DAG.getFreeze(FalseV));
8254 }
8255 // (select x, y, x) -> x & y
8256 // (select !x, y, x) -> x | y
8257 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
8258 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
8259 DAG.getFreeze(TrueV), FalseV);
8260 }
8261 }
8262
8263 return SDValue();
8264}
8265
8266// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
8267// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
8268// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
8269// being `0` or `-1`. In such cases we can replace `select` with `and`.
8270// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
8271// than `c0`?
8272static SDValue
8274 const RISCVSubtarget &Subtarget) {
8275 if (Subtarget.hasShortForwardBranchOpt())
8276 return SDValue();
8277
8278 unsigned SelOpNo = 0;
8279 SDValue Sel = BO->getOperand(0);
8280 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
8281 SelOpNo = 1;
8282 Sel = BO->getOperand(1);
8283 }
8284
8285 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
8286 return SDValue();
8287
8288 unsigned ConstSelOpNo = 1;
8289 unsigned OtherSelOpNo = 2;
8290 if (!dyn_cast<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
8291 ConstSelOpNo = 2;
8292 OtherSelOpNo = 1;
8293 }
8294 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
8295 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
8296 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
8297 return SDValue();
8298
8299 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
8300 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
8301 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
8302 return SDValue();
8303
8304 SDLoc DL(Sel);
8305 EVT VT = BO->getValueType(0);
8306
8307 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
8308 if (SelOpNo == 1)
8309 std::swap(NewConstOps[0], NewConstOps[1]);
8310
8311 SDValue NewConstOp =
8312 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
8313 if (!NewConstOp)
8314 return SDValue();
8315
8316 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
8317 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
8318 return SDValue();
8319
8320 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
8321 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
8322 if (SelOpNo == 1)
8323 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
8324 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
8325
8326 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
8327 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
8328 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
8329}
8330
8331SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
8332 SDValue CondV = Op.getOperand(0);
8333 SDValue TrueV = Op.getOperand(1);
8334 SDValue FalseV = Op.getOperand(2);
8335 SDLoc DL(Op);
8336 MVT VT = Op.getSimpleValueType();
8337 MVT XLenVT = Subtarget.getXLenVT();
8338
8339 // Lower vector SELECTs to VSELECTs by splatting the condition.
8340 if (VT.isVector()) {
8341 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
8342 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
8343 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
8344 }
8345
8346 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
8347 // nodes to implement the SELECT. Performing the lowering here allows for
8348 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
8349 // sequence or RISCVISD::SELECT_CC node (branch-based select).
8350 if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
8351 VT.isScalarInteger()) {
8352 // (select c, t, 0) -> (czero_eqz t, c)
8353 if (isNullConstant(FalseV))
8354 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
8355 // (select c, 0, f) -> (czero_nez f, c)
8356 if (isNullConstant(TrueV))
8357 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
8358
8359 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
8360 if (TrueV.getOpcode() == ISD::AND &&
8361 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV))
8362 return DAG.getNode(
8363 ISD::OR, DL, VT, TrueV,
8364 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
8365 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
8366 if (FalseV.getOpcode() == ISD::AND &&
8367 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV))
8368 return DAG.getNode(
8369 ISD::OR, DL, VT, FalseV,
8370 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
8371
8372 // Try some other optimizations before falling back to generic lowering.
8373 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
8374 return V;
8375
8376 // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
8377 // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
8378 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
8379 const APInt &TrueVal = TrueV->getAsAPIntVal();
8380 const APInt &FalseVal = FalseV->getAsAPIntVal();
8381 const int TrueValCost = RISCVMatInt::getIntMatCost(
8382 TrueVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
8383 const int FalseValCost = RISCVMatInt::getIntMatCost(
8384 FalseVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
8385 bool IsCZERO_NEZ = TrueValCost <= FalseValCost;
8386 SDValue LHSVal = DAG.getConstant(
8387 IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
8388 SDValue RHSVal =
8389 DAG.getConstant(IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT);
8390 SDValue CMOV =
8392 DL, VT, LHSVal, CondV);
8393 return DAG.getNode(ISD::ADD, DL, VT, CMOV, RHSVal);
8394 }
8395
8396 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
8397 // Unless we have the short forward branch optimization.
8398 if (!Subtarget.hasConditionalMoveFusion())
8399 return DAG.getNode(
8400 ISD::OR, DL, VT,
8401 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
8402 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
8403 }
8404
8405 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
8406 return V;
8407
8408 if (Op.hasOneUse()) {
8409 unsigned UseOpc = Op->user_begin()->getOpcode();
8410 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
8411 SDNode *BinOp = *Op->user_begin();
8412 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
8413 DAG, Subtarget)) {
8414 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
8415 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
8416 // may return a constant node and cause crash in lowerSELECT.
8417 if (NewSel.getOpcode() == ISD::SELECT)
8418 return lowerSELECT(NewSel, DAG);
8419 return NewSel;
8420 }
8421 }
8422 }
8423
8424 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
8425 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
8426 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
8427 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
8428 if (FPTV && FPFV) {
8429 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
8430 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
8431 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
8432 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
8433 DAG.getConstant(1, DL, XLenVT));
8434 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
8435 }
8436 }
8437
8438 // If the condition is not an integer SETCC which operates on XLenVT, we need
8439 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
8440 // (select condv, truev, falsev)
8441 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
8442 if (CondV.getOpcode() != ISD::SETCC ||
8443 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
8444 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
8445 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
8446
8447 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
8448
8449 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
8450 }
8451
8452 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
8453 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
8454 // advantage of the integer compare+branch instructions. i.e.:
8455 // (select (setcc lhs, rhs, cc), truev, falsev)
8456 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
8457 SDValue LHS = CondV.getOperand(0);
8458 SDValue RHS = CondV.getOperand(1);
8459 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
8460
8461 // Special case for a select of 2 constants that have a diffence of 1.
8462 // Normally this is done by DAGCombine, but if the select is introduced by
8463 // type legalization or op legalization, we miss it. Restricting to SETLT
8464 // case for now because that is what signed saturating add/sub need.
8465 // FIXME: We don't need the condition to be SETLT or even a SETCC,
8466 // but we would probably want to swap the true/false values if the condition
8467 // is SETGE/SETLE to avoid an XORI.
8468 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
8469 CCVal == ISD::SETLT) {
8470 const APInt &TrueVal = TrueV->getAsAPIntVal();
8471 const APInt &FalseVal = FalseV->getAsAPIntVal();
8472 if (TrueVal - 1 == FalseVal)
8473 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
8474 if (TrueVal + 1 == FalseVal)
8475 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
8476 }
8477
8478 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
8479 // 1 < x ? x : 1 -> 0 < x ? x : 1
8480 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
8481 RHS == TrueV && LHS == FalseV) {
8482 LHS = DAG.getConstant(0, DL, VT);
8483 // 0 <u x is the same as x != 0.
8484 if (CCVal == ISD::SETULT) {
8485 std::swap(LHS, RHS);
8486 CCVal = ISD::SETNE;
8487 }
8488 }
8489
8490 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
8491 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
8492 RHS == FalseV) {
8493 RHS = DAG.getConstant(0, DL, VT);
8494 }
8495
8496 SDValue TargetCC = DAG.getCondCode(CCVal);
8497
8498 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
8499 // (select (setcc lhs, rhs, CC), constant, falsev)
8500 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
8501 std::swap(TrueV, FalseV);
8502 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
8503 }
8504
8505 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
8506 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
8507}
8508
8509SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
8510 SDValue CondV = Op.getOperand(1);
8511 SDLoc DL(Op);
8512 MVT XLenVT = Subtarget.getXLenVT();
8513
8514 if (CondV.getOpcode() == ISD::SETCC &&
8515 CondV.getOperand(0).getValueType() == XLenVT) {
8516 SDValue LHS = CondV.getOperand(0);
8517 SDValue RHS = CondV.getOperand(1);
8518 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
8519
8520 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
8521
8522 SDValue TargetCC = DAG.getCondCode(CCVal);
8523 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
8524 LHS, RHS, TargetCC, Op.getOperand(2));
8525 }
8526
8527 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
8528 CondV, DAG.getConstant(0, DL, XLenVT),
8529 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
8530}
8531
8532SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
8535
8536 SDLoc DL(Op);
8537 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
8539
8540 // vastart just stores the address of the VarArgsFrameIndex slot into the
8541 // memory location argument.
8542 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
8543 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
8544 MachinePointerInfo(SV));
8545}
8546
8547SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
8548 SelectionDAG &DAG) const {
8549 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
8551 MachineFrameInfo &MFI = MF.getFrameInfo();
8552 MFI.setFrameAddressIsTaken(true);
8553 Register FrameReg = RI.getFrameRegister(MF);
8554 int XLenInBytes = Subtarget.getXLen() / 8;
8555
8556 EVT VT = Op.getValueType();
8557 SDLoc DL(Op);
8558 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
8559 unsigned Depth = Op.getConstantOperandVal(0);
8560 while (Depth--) {
8561 int Offset = -(XLenInBytes * 2);
8562 SDValue Ptr = DAG.getNode(
8563 ISD::ADD, DL, VT, FrameAddr,
8565 FrameAddr =
8566 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
8567 }
8568 return FrameAddr;
8569}
8570
8571SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
8572 SelectionDAG &DAG) const {
8573 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
8575 MachineFrameInfo &MFI = MF.getFrameInfo();
8576 MFI.setReturnAddressIsTaken(true);
8577 MVT XLenVT = Subtarget.getXLenVT();
8578 int XLenInBytes = Subtarget.getXLen() / 8;
8579
8581 return SDValue();
8582
8583 EVT VT = Op.getValueType();
8584 SDLoc DL(Op);
8585 unsigned Depth = Op.getConstantOperandVal(0);
8586 if (Depth) {
8587 int Off = -XLenInBytes;
8588 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
8589 SDValue Offset = DAG.getSignedConstant(Off, DL, VT);
8590 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
8591 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
8593 }
8594
8595 // Return the value of the return address register, marking it an implicit
8596 // live-in.
8597 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
8598 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
8599}
8600
8601SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
8602 SelectionDAG &DAG) const {
8603 SDLoc DL(Op);
8604 SDValue Lo = Op.getOperand(0);
8605 SDValue Hi = Op.getOperand(1);
8606 SDValue Shamt = Op.getOperand(2);
8607 EVT VT = Lo.getValueType();
8608
8609 // if Shamt-XLEN < 0: // Shamt < XLEN
8610 // Lo = Lo << Shamt
8611 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
8612 // else:
8613 // Lo = 0
8614 // Hi = Lo << (Shamt-XLEN)
8615
8616 SDValue Zero = DAG.getConstant(0, DL, VT);
8617 SDValue One = DAG.getConstant(1, DL, VT);
8618 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
8619 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
8620 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
8621 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
8622
8623 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
8624 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
8625 SDValue ShiftRightLo =
8626 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
8627 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
8628 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
8629 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
8630
8631 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
8632
8633 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
8634 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
8635
8636 SDValue Parts[2] = {Lo, Hi};
8637 return DAG.getMergeValues(Parts, DL);
8638}
8639
8640SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
8641 bool IsSRA) const {
8642 SDLoc DL(Op);
8643 SDValue Lo = Op.getOperand(0);
8644 SDValue Hi = Op.getOperand(1);
8645 SDValue Shamt = Op.getOperand(2);
8646 EVT VT = Lo.getValueType();
8647
8648 // SRA expansion:
8649 // if Shamt-XLEN < 0: // Shamt < XLEN
8650 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
8651 // Hi = Hi >>s Shamt
8652 // else:
8653 // Lo = Hi >>s (Shamt-XLEN);
8654 // Hi = Hi >>s (XLEN-1)
8655 //
8656 // SRL expansion:
8657 // if Shamt-XLEN < 0: // Shamt < XLEN
8658 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
8659 // Hi = Hi >>u Shamt
8660 // else:
8661 // Lo = Hi >>u (Shamt-XLEN);
8662 // Hi = 0;
8663
8664 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
8665
8666 SDValue Zero = DAG.getConstant(0, DL, VT);
8667 SDValue One = DAG.getConstant(1, DL, VT);
8668 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
8669 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
8670 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
8671 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
8672
8673 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
8674 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
8675 SDValue ShiftLeftHi =
8676 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
8677 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
8678 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
8679 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
8680 SDValue HiFalse =
8681 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
8682
8683 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
8684
8685 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
8686 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
8687
8688 SDValue Parts[2] = {Lo, Hi};
8689 return DAG.getMergeValues(Parts, DL);
8690}
8691
8692// Lower splats of i1 types to SETCC. For each mask vector type, we have a
8693// legal equivalently-sized i8 type, so we can use that as a go-between.
8694SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
8695 SelectionDAG &DAG) const {
8696 SDLoc DL(Op);
8697 MVT VT = Op.getSimpleValueType();
8698 SDValue SplatVal = Op.getOperand(0);
8699 // All-zeros or all-ones splats are handled specially.
8700 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
8701 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
8702 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
8703 }
8704 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
8705 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
8706 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
8707 }
8708 MVT InterVT = VT.changeVectorElementType(MVT::i8);
8709 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
8710 DAG.getConstant(1, DL, SplatVal.getValueType()));
8711 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
8712 SDValue Zero = DAG.getConstant(0, DL, InterVT);
8713 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
8714}
8715
8716// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
8717// illegal (currently only vXi64 RV32).
8718// FIXME: We could also catch non-constant sign-extended i32 values and lower
8719// them to VMV_V_X_VL.
8720SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
8721 SelectionDAG &DAG) const {
8722 SDLoc DL(Op);
8723 MVT VecVT = Op.getSimpleValueType();
8724 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
8725 "Unexpected SPLAT_VECTOR_PARTS lowering");
8726
8727 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
8728 SDValue Lo = Op.getOperand(0);
8729 SDValue Hi = Op.getOperand(1);
8730
8731 MVT ContainerVT = VecVT;
8732 if (VecVT.isFixedLengthVector())
8733 ContainerVT = getContainerForFixedLengthVector(VecVT);
8734
8735 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
8736
8737 SDValue Res =
8738 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
8739
8740 if (VecVT.isFixedLengthVector())
8741 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
8742
8743 return Res;
8744}
8745
8746// Custom-lower extensions from mask vectors by using a vselect either with 1
8747// for zero/any-extension or -1 for sign-extension:
8748// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
8749// Note that any-extension is lowered identically to zero-extension.
8750SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
8751 int64_t ExtTrueVal) const {
8752 SDLoc DL(Op);
8753 MVT VecVT = Op.getSimpleValueType();
8754 SDValue Src = Op.getOperand(0);
8755 // Only custom-lower extensions from mask types
8756 assert(Src.getValueType().isVector() &&
8757 Src.getValueType().getVectorElementType() == MVT::i1);
8758
8759 if (VecVT.isScalableVector()) {
8760 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
8761 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, VecVT);
8762 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
8763 }
8764
8765 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
8766 MVT I1ContainerVT =
8767 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
8768
8769 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
8770
8771 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
8772
8773 MVT XLenVT = Subtarget.getXLenVT();
8774 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
8775 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, XLenVT);
8776
8777 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8778 DAG.getUNDEF(ContainerVT), SplatZero, VL);
8779 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8780 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
8781 SDValue Select =
8782 DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal,
8783 SplatZero, DAG.getUNDEF(ContainerVT), VL);
8784
8785 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
8786}
8787
8788SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
8789 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
8790 MVT ExtVT = Op.getSimpleValueType();
8791 // Only custom-lower extensions from fixed-length vector types.
8792 if (!ExtVT.isFixedLengthVector())
8793 return Op;
8794 MVT VT = Op.getOperand(0).getSimpleValueType();
8795 // Grab the canonical container type for the extended type. Infer the smaller
8796 // type from that to ensure the same number of vector elements, as we know
8797 // the LMUL will be sufficient to hold the smaller type.
8798 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
8799 // Get the extended container type manually to ensure the same number of
8800 // vector elements between source and dest.
8801 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
8802 ContainerExtVT.getVectorElementCount());
8803
8804 SDValue Op1 =
8805 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
8806
8807 SDLoc DL(Op);
8808 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
8809
8810 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
8811
8812 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
8813}
8814
8815// Custom-lower truncations from vectors to mask vectors by using a mask and a
8816// setcc operation:
8817// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
8818SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
8819 SelectionDAG &DAG) const {
8820 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
8821 SDLoc DL(Op);
8822 EVT MaskVT = Op.getValueType();
8823 // Only expect to custom-lower truncations to mask types
8824 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
8825 "Unexpected type for vector mask lowering");
8826 SDValue Src = Op.getOperand(0);
8827 MVT VecVT = Src.getSimpleValueType();
8828 SDValue Mask, VL;
8829 if (IsVPTrunc) {
8830 Mask = Op.getOperand(1);
8831 VL = Op.getOperand(2);
8832 }
8833 // If this is a fixed vector, we need to convert it to a scalable vector.
8834 MVT ContainerVT = VecVT;
8835
8836 if (VecVT.isFixedLengthVector()) {
8837 ContainerVT = getContainerForFixedLengthVector(VecVT);
8838 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8839 if (IsVPTrunc) {
8840 MVT MaskContainerVT =
8841 getContainerForFixedLengthVector(Mask.getSimpleValueType());
8842 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
8843 }
8844 }
8845
8846 if (!IsVPTrunc) {
8847 std::tie(Mask, VL) =
8848 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8849 }
8850
8851 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
8852 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
8853
8854 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8855 DAG.getUNDEF(ContainerVT), SplatOne, VL);
8856 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8857 DAG.getUNDEF(ContainerVT), SplatZero, VL);
8858
8859 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
8860 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
8861 DAG.getUNDEF(ContainerVT), Mask, VL);
8862 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
8863 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
8864 DAG.getUNDEF(MaskContainerVT), Mask, VL});
8865 if (MaskVT.isFixedLengthVector())
8866 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
8867 return Trunc;
8868}
8869
8870SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
8871 SelectionDAG &DAG) const {
8872 unsigned Opc = Op.getOpcode();
8873 bool IsVPTrunc = Opc == ISD::VP_TRUNCATE;
8874 SDLoc DL(Op);
8875
8876 MVT VT = Op.getSimpleValueType();
8877 // Only custom-lower vector truncates
8878 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8879
8880 // Truncates to mask types are handled differently
8881 if (VT.getVectorElementType() == MVT::i1)
8882 return lowerVectorMaskTruncLike(Op, DAG);
8883
8884 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
8885 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
8886 // truncate by one power of two at a time.
8887 MVT DstEltVT = VT.getVectorElementType();
8888
8889 SDValue Src = Op.getOperand(0);
8890 MVT SrcVT = Src.getSimpleValueType();
8891 MVT SrcEltVT = SrcVT.getVectorElementType();
8892
8893 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
8894 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
8895 "Unexpected vector truncate lowering");
8896
8897 MVT ContainerVT = SrcVT;
8898 SDValue Mask, VL;
8899 if (IsVPTrunc) {
8900 Mask = Op.getOperand(1);
8901 VL = Op.getOperand(2);
8902 }
8903 if (SrcVT.isFixedLengthVector()) {
8904 ContainerVT = getContainerForFixedLengthVector(SrcVT);
8905 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8906 if (IsVPTrunc) {
8907 MVT MaskVT = getMaskTypeFor(ContainerVT);
8908 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8909 }
8910 }
8911
8912 SDValue Result = Src;
8913 if (!IsVPTrunc) {
8914 std::tie(Mask, VL) =
8915 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8916 }
8917
8918 unsigned NewOpc;
8919 if (Opc == ISD::TRUNCATE_SSAT_S)
8921 else if (Opc == ISD::TRUNCATE_USAT_U)
8923 else
8925
8926 do {
8927 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
8928 MVT ResultVT = ContainerVT.changeVectorElementType(SrcEltVT);
8929 Result = DAG.getNode(NewOpc, DL, ResultVT, Result, Mask, VL);
8930 } while (SrcEltVT != DstEltVT);
8931
8932 if (SrcVT.isFixedLengthVector())
8933 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
8934
8935 return Result;
8936}
8937
8938SDValue
8939RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
8940 SelectionDAG &DAG) const {
8941 SDLoc DL(Op);
8942 SDValue Chain = Op.getOperand(0);
8943 SDValue Src = Op.getOperand(1);
8944 MVT VT = Op.getSimpleValueType();
8945 MVT SrcVT = Src.getSimpleValueType();
8946 MVT ContainerVT = VT;
8947 if (VT.isFixedLengthVector()) {
8948 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8949 ContainerVT =
8950 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8951 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8952 }
8953
8954 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8955
8956 // RVV can only widen/truncate fp to types double/half the size as the source.
8957 if ((VT.getVectorElementType() == MVT::f64 &&
8958 (SrcVT.getVectorElementType() == MVT::f16 ||
8959 SrcVT.getVectorElementType() == MVT::bf16)) ||
8960 ((VT.getVectorElementType() == MVT::f16 ||
8961 VT.getVectorElementType() == MVT::bf16) &&
8962 SrcVT.getVectorElementType() == MVT::f64)) {
8963 // For double rounding, the intermediate rounding should be round-to-odd.
8964 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8967 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8968 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
8969 Chain, Src, Mask, VL);
8970 Chain = Src.getValue(1);
8971 }
8972
8973 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8976 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
8977 Chain, Src, Mask, VL);
8978 if (VT.isFixedLengthVector()) {
8979 // StrictFP operations have two result values. Their lowered result should
8980 // have same result count.
8981 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
8982 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
8983 }
8984 return Res;
8985}
8986
8987SDValue
8988RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
8989 SelectionDAG &DAG) const {
8990 bool IsVP =
8991 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
8992 bool IsExtend =
8993 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
8994 // RVV can only do truncate fp to types half the size as the source. We
8995 // custom-lower f64->f16 rounds via RVV's round-to-odd float
8996 // conversion instruction.
8997 SDLoc DL(Op);
8998 MVT VT = Op.getSimpleValueType();
8999
9000 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
9001
9002 SDValue Src = Op.getOperand(0);
9003 MVT SrcVT = Src.getSimpleValueType();
9004
9005 bool IsDirectExtend =
9006 IsExtend && (VT.getVectorElementType() != MVT::f64 ||
9007 (SrcVT.getVectorElementType() != MVT::f16 &&
9008 SrcVT.getVectorElementType() != MVT::bf16));
9009 bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 &&
9010 VT.getVectorElementType() != MVT::bf16) ||
9011 SrcVT.getVectorElementType() != MVT::f64);
9012
9013 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
9014
9015 // Prepare any fixed-length vector operands.
9016 MVT ContainerVT = VT;
9017 SDValue Mask, VL;
9018 if (IsVP) {
9019 Mask = Op.getOperand(1);
9020 VL = Op.getOperand(2);
9021 }
9022 if (VT.isFixedLengthVector()) {
9023 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
9024 ContainerVT =
9025 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
9026 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
9027 if (IsVP) {
9028 MVT MaskVT = getMaskTypeFor(ContainerVT);
9029 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9030 }
9031 }
9032
9033 if (!IsVP)
9034 std::tie(Mask, VL) =
9035 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
9036
9037 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
9038
9039 if (IsDirectConv) {
9040 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
9041 if (VT.isFixedLengthVector())
9042 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
9043 return Src;
9044 }
9045
9046 unsigned InterConvOpc =
9048
9049 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
9050 SDValue IntermediateConv =
9051 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
9052 SDValue Result =
9053 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
9054 if (VT.isFixedLengthVector())
9055 return convertFromScalableVector(VT, Result, DAG, Subtarget);
9056 return Result;
9057}
9058
9059// Given a scalable vector type and an index into it, returns the type for the
9060// smallest subvector that the index fits in. This can be used to reduce LMUL
9061// for operations like vslidedown.
9062//
9063// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
9064static std::optional<MVT>
9065getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
9066 const RISCVSubtarget &Subtarget) {
9067 assert(VecVT.isScalableVector());
9068 const unsigned EltSize = VecVT.getScalarSizeInBits();
9069 const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
9070 const unsigned MinVLMAX = VectorBitsMin / EltSize;
9071 MVT SmallerVT;
9072 if (MaxIdx < MinVLMAX)
9073 SmallerVT = getLMUL1VT(VecVT);
9074 else if (MaxIdx < MinVLMAX * 2)
9075 SmallerVT = getLMUL1VT(VecVT).getDoubleNumVectorElementsVT();
9076 else if (MaxIdx < MinVLMAX * 4)
9077 SmallerVT = getLMUL1VT(VecVT)
9080 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
9081 return std::nullopt;
9082 return SmallerVT;
9083}
9084
9085// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
9086// first position of a vector, and that vector is slid up to the insert index.
9087// By limiting the active vector length to index+1 and merging with the
9088// original vector (with an undisturbed tail policy for elements >= VL), we
9089// achieve the desired result of leaving all elements untouched except the one
9090// at VL-1, which is replaced with the desired value.
9091SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
9092 SelectionDAG &DAG) const {
9093 SDLoc DL(Op);
9094 MVT VecVT = Op.getSimpleValueType();
9095 MVT XLenVT = Subtarget.getXLenVT();
9096 SDValue Vec = Op.getOperand(0);
9097 SDValue Val = Op.getOperand(1);
9098 MVT ValVT = Val.getSimpleValueType();
9099 SDValue Idx = Op.getOperand(2);
9100
9101 if (VecVT.getVectorElementType() == MVT::i1) {
9102 // FIXME: For now we just promote to an i8 vector and insert into that,
9103 // but this is probably not optimal.
9104 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
9105 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
9106 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
9107 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
9108 }
9109
9110 if ((ValVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
9111 ValVT == MVT::bf16) {
9112 // If we don't have vfmv.s.f for f16/bf16, use fmv.x.h first.
9113 MVT IntVT = VecVT.changeTypeToInteger();
9114 SDValue IntInsert = DAG.getNode(
9115 ISD::INSERT_VECTOR_ELT, DL, IntVT, DAG.getBitcast(IntVT, Vec),
9116 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Val), Idx);
9117 return DAG.getBitcast(VecVT, IntInsert);
9118 }
9119
9120 MVT ContainerVT = VecVT;
9121 // If the operand is a fixed-length vector, convert to a scalable one.
9122 if (VecVT.isFixedLengthVector()) {
9123 ContainerVT = getContainerForFixedLengthVector(VecVT);
9124 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9125 }
9126
9127 // If we know the index we're going to insert at, we can shrink Vec so that
9128 // we're performing the scalar inserts and slideup on a smaller LMUL.
9129 MVT OrigContainerVT = ContainerVT;
9130 SDValue OrigVec = Vec;
9131 SDValue AlignedIdx;
9132 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
9133 const unsigned OrigIdx = IdxC->getZExtValue();
9134 // Do we know an upper bound on LMUL?
9135 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
9136 DL, DAG, Subtarget)) {
9137 ContainerVT = *ShrunkVT;
9138 AlignedIdx = DAG.getVectorIdxConstant(0, DL);
9139 }
9140
9141 // If we're compiling for an exact VLEN value, we can always perform
9142 // the insert in m1 as we can determine the register corresponding to
9143 // the index in the register group.
9144 const MVT M1VT = getLMUL1VT(ContainerVT);
9145 if (auto VLEN = Subtarget.getRealVLen();
9146 VLEN && ContainerVT.bitsGT(M1VT)) {
9147 EVT ElemVT = VecVT.getVectorElementType();
9148 unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
9149 unsigned RemIdx = OrigIdx % ElemsPerVReg;
9150 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
9151 unsigned ExtractIdx =
9152 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
9153 AlignedIdx = DAG.getVectorIdxConstant(ExtractIdx, DL);
9154 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
9155 ContainerVT = M1VT;
9156 }
9157
9158 if (AlignedIdx)
9159 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
9160 AlignedIdx);
9161 }
9162
9163 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
9164 // Even i64-element vectors on RV32 can be lowered without scalar
9165 // legalization if the most-significant 32 bits of the value are not affected
9166 // by the sign-extension of the lower 32 bits.
9167 // TODO: We could also catch sign extensions of a 32-bit value.
9168 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
9169 const auto *CVal = cast<ConstantSDNode>(Val);
9170 if (isInt<32>(CVal->getSExtValue())) {
9171 IsLegalInsert = true;
9172 Val = DAG.getSignedConstant(CVal->getSExtValue(), DL, MVT::i32);
9173 }
9174 }
9175
9176 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9177
9178 SDValue ValInVec;
9179
9180 if (IsLegalInsert) {
9181 unsigned Opc =
9183 if (isNullConstant(Idx)) {
9184 if (!VecVT.isFloatingPoint())
9185 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
9186 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
9187
9188 if (AlignedIdx)
9189 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
9190 Vec, AlignedIdx);
9191 if (!VecVT.isFixedLengthVector())
9192 return Vec;
9193 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
9194 }
9195 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
9196 } else {
9197 // On RV32, i64-element vectors must be specially handled to place the
9198 // value at element 0, by using two vslide1down instructions in sequence on
9199 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
9200 // this.
9201 SDValue ValLo, ValHi;
9202 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
9203 MVT I32ContainerVT =
9204 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
9205 SDValue I32Mask =
9206 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
9207 // Limit the active VL to two.
9208 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
9209 // If the Idx is 0 we can insert directly into the vector.
9210 if (isNullConstant(Idx)) {
9211 // First slide in the lo value, then the hi in above it. We use slide1down
9212 // to avoid the register group overlap constraint of vslide1up.
9213 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9214 Vec, Vec, ValLo, I32Mask, InsertI64VL);
9215 // If the source vector is undef don't pass along the tail elements from
9216 // the previous slide1down.
9217 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
9218 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9219 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
9220 // Bitcast back to the right container type.
9221 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
9222
9223 if (AlignedIdx)
9224 ValInVec =
9225 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
9226 ValInVec, AlignedIdx);
9227 if (!VecVT.isFixedLengthVector())
9228 return ValInVec;
9229 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
9230 }
9231
9232 // First slide in the lo value, then the hi in above it. We use slide1down
9233 // to avoid the register group overlap constraint of vslide1up.
9234 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9235 DAG.getUNDEF(I32ContainerVT),
9236 DAG.getUNDEF(I32ContainerVT), ValLo,
9237 I32Mask, InsertI64VL);
9238 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9239 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
9240 I32Mask, InsertI64VL);
9241 // Bitcast back to the right container type.
9242 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
9243 }
9244
9245 // Now that the value is in a vector, slide it into position.
9246 SDValue InsertVL =
9247 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
9248
9249 // Use tail agnostic policy if Idx is the last index of Vec.
9251 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
9252 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
9253 Policy = RISCVII::TAIL_AGNOSTIC;
9254 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
9255 Idx, Mask, InsertVL, Policy);
9256
9257 if (AlignedIdx)
9258 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
9259 Slideup, AlignedIdx);
9260 if (!VecVT.isFixedLengthVector())
9261 return Slideup;
9262 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
9263}
9264
9265// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
9266// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
9267// types this is done using VMV_X_S to allow us to glean information about the
9268// sign bits of the result.
9269SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
9270 SelectionDAG &DAG) const {
9271 SDLoc DL(Op);
9272 SDValue Idx = Op.getOperand(1);
9273 SDValue Vec = Op.getOperand(0);
9274 EVT EltVT = Op.getValueType();
9275 MVT VecVT = Vec.getSimpleValueType();
9276 MVT XLenVT = Subtarget.getXLenVT();
9277
9278 if (VecVT.getVectorElementType() == MVT::i1) {
9279 // Use vfirst.m to extract the first bit.
9280 if (isNullConstant(Idx)) {
9281 MVT ContainerVT = VecVT;
9282 if (VecVT.isFixedLengthVector()) {
9283 ContainerVT = getContainerForFixedLengthVector(VecVT);
9284 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9285 }
9286 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9287 SDValue Vfirst =
9288 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
9289 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
9290 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
9291 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
9292 }
9293 if (VecVT.isFixedLengthVector()) {
9294 unsigned NumElts = VecVT.getVectorNumElements();
9295 if (NumElts >= 8) {
9296 MVT WideEltVT;
9297 unsigned WidenVecLen;
9298 SDValue ExtractElementIdx;
9299 SDValue ExtractBitIdx;
9300 unsigned MaxEEW = Subtarget.getELen();
9301 MVT LargestEltVT = MVT::getIntegerVT(
9302 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
9303 if (NumElts <= LargestEltVT.getSizeInBits()) {
9304 assert(isPowerOf2_32(NumElts) &&
9305 "the number of elements should be power of 2");
9306 WideEltVT = MVT::getIntegerVT(NumElts);
9307 WidenVecLen = 1;
9308 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
9309 ExtractBitIdx = Idx;
9310 } else {
9311 WideEltVT = LargestEltVT;
9312 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
9313 // extract element index = index / element width
9314 ExtractElementIdx = DAG.getNode(
9315 ISD::SRL, DL, XLenVT, Idx,
9316 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
9317 // mask bit index = index % element width
9318 ExtractBitIdx = DAG.getNode(
9319 ISD::AND, DL, XLenVT, Idx,
9320 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
9321 }
9322 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
9323 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
9324 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
9325 Vec, ExtractElementIdx);
9326 // Extract the bit from GPR.
9327 SDValue ShiftRight =
9328 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
9329 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
9330 DAG.getConstant(1, DL, XLenVT));
9331 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
9332 }
9333 }
9334 // Otherwise, promote to an i8 vector and extract from that.
9335 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
9336 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
9337 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
9338 }
9339
9340 if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
9341 EltVT == MVT::bf16) {
9342 // If we don't have vfmv.f.s for f16/bf16, extract to a gpr then use fmv.h.x
9343 MVT IntVT = VecVT.changeTypeToInteger();
9344 SDValue IntVec = DAG.getBitcast(IntVT, Vec);
9345 SDValue IntExtract =
9346 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT, IntVec, Idx);
9347 return DAG.getNode(RISCVISD::FMV_H_X, DL, EltVT, IntExtract);
9348 }
9349
9350 // If this is a fixed vector, we need to convert it to a scalable vector.
9351 MVT ContainerVT = VecVT;
9352 if (VecVT.isFixedLengthVector()) {
9353 ContainerVT = getContainerForFixedLengthVector(VecVT);
9354 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9355 }
9356
9357 // If we're compiling for an exact VLEN value and we have a known
9358 // constant index, we can always perform the extract in m1 (or
9359 // smaller) as we can determine the register corresponding to
9360 // the index in the register group.
9361 const auto VLen = Subtarget.getRealVLen();
9362 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
9363 IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
9364 MVT M1VT = getLMUL1VT(ContainerVT);
9365 unsigned OrigIdx = IdxC->getZExtValue();
9366 EVT ElemVT = VecVT.getVectorElementType();
9367 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
9368 unsigned RemIdx = OrigIdx % ElemsPerVReg;
9369 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
9370 unsigned ExtractIdx =
9371 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
9372 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
9373 DAG.getVectorIdxConstant(ExtractIdx, DL));
9374 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
9375 ContainerVT = M1VT;
9376 }
9377
9378 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
9379 // contains our index.
9380 std::optional<uint64_t> MaxIdx;
9381 if (VecVT.isFixedLengthVector())
9382 MaxIdx = VecVT.getVectorNumElements() - 1;
9383 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
9384 MaxIdx = IdxC->getZExtValue();
9385 if (MaxIdx) {
9386 if (auto SmallerVT =
9387 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
9388 ContainerVT = *SmallerVT;
9389 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
9390 DAG.getConstant(0, DL, XLenVT));
9391 }
9392 }
9393
9394 // If after narrowing, the required slide is still greater than LMUL2,
9395 // fallback to generic expansion and go through the stack. This is done
9396 // for a subtle reason: extracting *all* elements out of a vector is
9397 // widely expected to be linear in vector size, but because vslidedown
9398 // is linear in LMUL, performing N extracts using vslidedown becomes
9399 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
9400 // seems to have the same problem (the store is linear in LMUL), but the
9401 // generic expansion *memoizes* the store, and thus for many extracts of
9402 // the same vector we end up with one store and a bunch of loads.
9403 // TODO: We don't have the same code for insert_vector_elt because we
9404 // have BUILD_VECTOR and handle the degenerate case there. Should we
9405 // consider adding an inverse BUILD_VECTOR node?
9406 MVT LMUL2VT = getLMUL1VT(ContainerVT).getDoubleNumVectorElementsVT();
9407 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
9408 return SDValue();
9409
9410 // If the index is 0, the vector is already in the right position.
9411 if (!isNullConstant(Idx)) {
9412 // Use a VL of 1 to avoid processing more elements than we need.
9413 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
9414 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
9415 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
9416 }
9417
9418 if (!EltVT.isInteger()) {
9419 // Floating-point extracts are handled in TableGen.
9420 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
9421 DAG.getVectorIdxConstant(0, DL));
9422 }
9423
9424 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
9425 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
9426}
9427
9428// Some RVV intrinsics may claim that they want an integer operand to be
9429// promoted or expanded.
9431 const RISCVSubtarget &Subtarget) {
9432 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
9433 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
9434 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
9435 "Unexpected opcode");
9436
9437 if (!Subtarget.hasVInstructions())
9438 return SDValue();
9439
9440 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
9441 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
9442 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
9443
9444 SDLoc DL(Op);
9445
9447 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
9448 if (!II || !II->hasScalarOperand())
9449 return SDValue();
9450
9451 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
9452 assert(SplatOp < Op.getNumOperands());
9453
9455 SDValue &ScalarOp = Operands[SplatOp];
9456 MVT OpVT = ScalarOp.getSimpleValueType();
9457 MVT XLenVT = Subtarget.getXLenVT();
9458
9459 // If this isn't a scalar, or its type is XLenVT we're done.
9460 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
9461 return SDValue();
9462
9463 // Simplest case is that the operand needs to be promoted to XLenVT.
9464 if (OpVT.bitsLT(XLenVT)) {
9465 // If the operand is a constant, sign extend to increase our chances
9466 // of being able to use a .vi instruction. ANY_EXTEND would become a
9467 // a zero extend and the simm5 check in isel would fail.
9468 // FIXME: Should we ignore the upper bits in isel instead?
9469 unsigned ExtOpc =
9470 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
9471 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
9472 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
9473 }
9474
9475 // Use the previous operand to get the vXi64 VT. The result might be a mask
9476 // VT for compares. Using the previous operand assumes that the previous
9477 // operand will never have a smaller element size than a scalar operand and
9478 // that a widening operation never uses SEW=64.
9479 // NOTE: If this fails the below assert, we can probably just find the
9480 // element count from any operand or result and use it to construct the VT.
9481 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
9482 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
9483
9484 // The more complex case is when the scalar is larger than XLenVT.
9485 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
9486 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
9487
9488 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
9489 // instruction to sign-extend since SEW>XLEN.
9490 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
9491 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
9492 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
9493 }
9494
9495 switch (IntNo) {
9496 case Intrinsic::riscv_vslide1up:
9497 case Intrinsic::riscv_vslide1down:
9498 case Intrinsic::riscv_vslide1up_mask:
9499 case Intrinsic::riscv_vslide1down_mask: {
9500 // We need to special case these when the scalar is larger than XLen.
9501 unsigned NumOps = Op.getNumOperands();
9502 bool IsMasked = NumOps == 7;
9503
9504 // Convert the vector source to the equivalent nxvXi32 vector.
9505 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
9506 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
9507 SDValue ScalarLo, ScalarHi;
9508 std::tie(ScalarLo, ScalarHi) =
9509 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
9510
9511 // Double the VL since we halved SEW.
9512 SDValue AVL = getVLOperand(Op);
9513 SDValue I32VL;
9514
9515 // Optimize for constant AVL
9516 if (isa<ConstantSDNode>(AVL)) {
9517 const auto [MinVLMAX, MaxVLMAX] =
9519
9520 uint64_t AVLInt = AVL->getAsZExtVal();
9521 if (AVLInt <= MinVLMAX) {
9522 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
9523 } else if (AVLInt >= 2 * MaxVLMAX) {
9524 // Just set vl to VLMAX in this situation
9525 I32VL = DAG.getRegister(RISCV::X0, XLenVT);
9526 } else {
9527 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
9528 // is related to the hardware implementation.
9529 // So let the following code handle
9530 }
9531 }
9532 if (!I32VL) {
9534 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
9535 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
9536 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
9537 SDValue SETVL =
9538 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
9539 // Using vsetvli instruction to get actually used length which related to
9540 // the hardware implementation
9541 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
9542 SEW, LMUL);
9543 I32VL =
9544 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
9545 }
9546
9547 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
9548
9549 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
9550 // instructions.
9551 SDValue Passthru;
9552 if (IsMasked)
9553 Passthru = DAG.getUNDEF(I32VT);
9554 else
9555 Passthru = DAG.getBitcast(I32VT, Operands[1]);
9556
9557 if (IntNo == Intrinsic::riscv_vslide1up ||
9558 IntNo == Intrinsic::riscv_vslide1up_mask) {
9559 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
9560 ScalarHi, I32Mask, I32VL);
9561 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
9562 ScalarLo, I32Mask, I32VL);
9563 } else {
9564 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
9565 ScalarLo, I32Mask, I32VL);
9566 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
9567 ScalarHi, I32Mask, I32VL);
9568 }
9569
9570 // Convert back to nxvXi64.
9571 Vec = DAG.getBitcast(VT, Vec);
9572
9573 if (!IsMasked)
9574 return Vec;
9575 // Apply mask after the operation.
9576 SDValue Mask = Operands[NumOps - 3];
9577 SDValue MaskedOff = Operands[1];
9578 // Assume Policy operand is the last operand.
9579 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();
9580 // We don't need to select maskedoff if it's undef.
9581 if (MaskedOff.isUndef())
9582 return Vec;
9583 // TAMU
9584 if (Policy == RISCVII::TAIL_AGNOSTIC)
9585 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
9586 DAG.getUNDEF(VT), AVL);
9587 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
9588 // It's fine because vmerge does not care mask policy.
9589 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
9590 MaskedOff, AVL);
9591 }
9592 }
9593
9594 // We need to convert the scalar to a splat vector.
9595 SDValue VL = getVLOperand(Op);
9596 assert(VL.getValueType() == XLenVT);
9597 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
9598 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
9599}
9600
9601// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
9602// scalable vector llvm.get.vector.length for now.
9603//
9604// We need to convert from a scalable VF to a vsetvli with VLMax equal to
9605// (vscale * VF). The vscale and VF are independent of element width. We use
9606// SEW=8 for the vsetvli because it is the only element width that supports all
9607// fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is
9608// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
9609// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
9610// SEW and LMUL are better for the surrounding vector instructions.
9612 const RISCVSubtarget &Subtarget) {
9613 MVT XLenVT = Subtarget.getXLenVT();
9614
9615 // The smallest LMUL is only valid for the smallest element width.
9616 const unsigned ElementWidth = 8;
9617
9618 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
9619 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
9620 // We don't support VF==1 with ELEN==32.
9621 [[maybe_unused]] unsigned MinVF =
9622 RISCV::RVVBitsPerBlock / Subtarget.getELen();
9623
9624 [[maybe_unused]] unsigned VF = N->getConstantOperandVal(2);
9625 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
9626 "Unexpected VF");
9627
9628 bool Fractional = VF < LMul1VF;
9629 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
9630 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
9631 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
9632
9633 SDLoc DL(N);
9634
9635 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
9636 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
9637
9638 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
9639
9640 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
9641 SDValue Res =
9642 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
9643 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
9644}
9645
9647 const RISCVSubtarget &Subtarget) {
9648 SDValue Op0 = N->getOperand(1);
9649 MVT OpVT = Op0.getSimpleValueType();
9650 MVT ContainerVT = OpVT;
9651 if (OpVT.isFixedLengthVector()) {
9652 ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget);
9653 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
9654 }
9655 MVT XLenVT = Subtarget.getXLenVT();
9656 SDLoc DL(N);
9657 auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);
9658 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);
9659 if (isOneConstant(N->getOperand(2)))
9660 return Res;
9661
9662 // Convert -1 to VL.
9663 SDValue Setcc =
9664 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
9665 VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());
9666 return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);
9667}
9668
9669static inline void promoteVCIXScalar(const SDValue &Op,
9671 SelectionDAG &DAG) {
9672 const RISCVSubtarget &Subtarget =
9674
9675 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
9676 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
9677 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
9678 SDLoc DL(Op);
9679
9681 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
9682 if (!II || !II->hasScalarOperand())
9683 return;
9684
9685 unsigned SplatOp = II->ScalarOperand + 1;
9686 assert(SplatOp < Op.getNumOperands());
9687
9688 SDValue &ScalarOp = Operands[SplatOp];
9689 MVT OpVT = ScalarOp.getSimpleValueType();
9690 MVT XLenVT = Subtarget.getXLenVT();
9691
9692 // The code below is partially copied from lowerVectorIntrinsicScalars.
9693 // If this isn't a scalar, or its type is XLenVT we're done.
9694 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
9695 return;
9696
9697 // Manually emit promote operation for scalar operation.
9698 if (OpVT.bitsLT(XLenVT)) {
9699 unsigned ExtOpc =
9700 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
9701 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
9702 }
9703}
9704
9705static void processVCIXOperands(SDValue &OrigOp,
9707 SelectionDAG &DAG) {
9708 promoteVCIXScalar(OrigOp, Operands, DAG);
9709 const RISCVSubtarget &Subtarget =
9711 for (SDValue &V : Operands) {
9712 EVT ValType = V.getValueType();
9713 if (ValType.isVector() && ValType.isFloatingPoint()) {
9714 MVT InterimIVT =
9715 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
9716 ValType.getVectorElementCount());
9717 V = DAG.getBitcast(InterimIVT, V);
9718 }
9719 if (ValType.isFixedLengthVector()) {
9720 MVT OpContainerVT = getContainerForFixedLengthVector(
9721 DAG, V.getSimpleValueType(), Subtarget);
9722 V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
9723 }
9724 }
9725}
9726
9727// LMUL * VLEN should be greater than or equal to EGS * SEW
9728static inline bool isValidEGW(int EGS, EVT VT,
9729 const RISCVSubtarget &Subtarget) {
9730 return (Subtarget.getRealMinVLen() *
9732 EGS * VT.getScalarSizeInBits();
9733}
9734
9735SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
9736 SelectionDAG &DAG) const {
9737 unsigned IntNo = Op.getConstantOperandVal(0);
9738 SDLoc DL(Op);
9739 MVT XLenVT = Subtarget.getXLenVT();
9740
9741 switch (IntNo) {
9742 default:
9743 break; // Don't custom lower most intrinsics.
9744 case Intrinsic::riscv_tuple_insert: {
9745 SDValue Vec = Op.getOperand(1);
9746 SDValue SubVec = Op.getOperand(2);
9747 SDValue Index = Op.getOperand(3);
9748
9749 return DAG.getNode(RISCVISD::TUPLE_INSERT, DL, Op.getValueType(), Vec,
9750 SubVec, Index);
9751 }
9752 case Intrinsic::riscv_tuple_extract: {
9753 SDValue Vec = Op.getOperand(1);
9754 SDValue Index = Op.getOperand(2);
9755
9756 return DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, Op.getValueType(), Vec,
9757 Index);
9758 }
9759 case Intrinsic::thread_pointer: {
9760 EVT PtrVT = getPointerTy(DAG.getDataLayout());
9761 return DAG.getRegister(RISCV::X4, PtrVT);
9762 }
9763 case Intrinsic::riscv_orc_b:
9764 case Intrinsic::riscv_brev8:
9765 case Intrinsic::riscv_sha256sig0:
9766 case Intrinsic::riscv_sha256sig1:
9767 case Intrinsic::riscv_sha256sum0:
9768 case Intrinsic::riscv_sha256sum1:
9769 case Intrinsic::riscv_sm3p0:
9770 case Intrinsic::riscv_sm3p1: {
9771 unsigned Opc;
9772 switch (IntNo) {
9773 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
9774 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
9775 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
9776 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
9777 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
9778 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
9779 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
9780 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
9781 }
9782
9783 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
9784 }
9785 case Intrinsic::riscv_sm4ks:
9786 case Intrinsic::riscv_sm4ed: {
9787 unsigned Opc =
9788 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
9789
9790 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
9791 Op.getOperand(3));
9792 }
9793 case Intrinsic::riscv_zip:
9794 case Intrinsic::riscv_unzip: {
9795 unsigned Opc =
9796 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
9797 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
9798 }
9799 case Intrinsic::riscv_mopr:
9800 return DAG.getNode(RISCVISD::MOPR, DL, XLenVT, Op.getOperand(1),
9801 Op.getOperand(2));
9802
9803 case Intrinsic::riscv_moprr: {
9804 return DAG.getNode(RISCVISD::MOPRR, DL, XLenVT, Op.getOperand(1),
9805 Op.getOperand(2), Op.getOperand(3));
9806 }
9807 case Intrinsic::riscv_clmul:
9808 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
9809 Op.getOperand(2));
9810 case Intrinsic::riscv_clmulh:
9811 case Intrinsic::riscv_clmulr: {
9812 unsigned Opc =
9813 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
9814 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
9815 }
9816 case Intrinsic::experimental_get_vector_length:
9817 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
9818 case Intrinsic::experimental_cttz_elts:
9819 return lowerCttzElts(Op.getNode(), DAG, Subtarget);
9820 case Intrinsic::riscv_vmv_x_s: {
9821 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
9822 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
9823 }
9824 case Intrinsic::riscv_vfmv_f_s:
9825 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
9826 Op.getOperand(1), DAG.getVectorIdxConstant(0, DL));
9827 case Intrinsic::riscv_vmv_v_x:
9828 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
9829 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
9830 Subtarget);
9831 case Intrinsic::riscv_vfmv_v_f:
9832 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
9833 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9834 case Intrinsic::riscv_vmv_s_x: {
9835 SDValue Scalar = Op.getOperand(2);
9836
9837 if (Scalar.getValueType().bitsLE(XLenVT)) {
9838 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
9839 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
9840 Op.getOperand(1), Scalar, Op.getOperand(3));
9841 }
9842
9843 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
9844
9845 // This is an i64 value that lives in two scalar registers. We have to
9846 // insert this in a convoluted way. First we build vXi64 splat containing
9847 // the two values that we assemble using some bit math. Next we'll use
9848 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
9849 // to merge element 0 from our splat into the source vector.
9850 // FIXME: This is probably not the best way to do this, but it is
9851 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
9852 // point.
9853 // sw lo, (a0)
9854 // sw hi, 4(a0)
9855 // vlse vX, (a0)
9856 //
9857 // vid.v vVid
9858 // vmseq.vx mMask, vVid, 0
9859 // vmerge.vvm vDest, vSrc, vVal, mMask
9860 MVT VT = Op.getSimpleValueType();
9861 SDValue Vec = Op.getOperand(1);
9862 SDValue VL = getVLOperand(Op);
9863
9864 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
9865 if (Op.getOperand(1).isUndef())
9866 return SplattedVal;
9867 SDValue SplattedIdx =
9868 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
9869 DAG.getConstant(0, DL, MVT::i32), VL);
9870
9871 MVT MaskVT = getMaskTypeFor(VT);
9872 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
9873 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
9874 SDValue SelectCond =
9875 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
9876 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
9877 DAG.getUNDEF(MaskVT), Mask, VL});
9878 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,
9879 Vec, DAG.getUNDEF(VT), VL);
9880 }
9881 case Intrinsic::riscv_vfmv_s_f:
9882 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(),
9883 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9884 // EGS * EEW >= 128 bits
9885 case Intrinsic::riscv_vaesdf_vv:
9886 case Intrinsic::riscv_vaesdf_vs:
9887 case Intrinsic::riscv_vaesdm_vv:
9888 case Intrinsic::riscv_vaesdm_vs:
9889 case Intrinsic::riscv_vaesef_vv:
9890 case Intrinsic::riscv_vaesef_vs:
9891 case Intrinsic::riscv_vaesem_vv:
9892 case Intrinsic::riscv_vaesem_vs:
9893 case Intrinsic::riscv_vaeskf1:
9894 case Intrinsic::riscv_vaeskf2:
9895 case Intrinsic::riscv_vaesz_vs:
9896 case Intrinsic::riscv_vsm4k:
9897 case Intrinsic::riscv_vsm4r_vv:
9898 case Intrinsic::riscv_vsm4r_vs: {
9899 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9900 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9901 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9902 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9903 return Op;
9904 }
9905 // EGS * EEW >= 256 bits
9906 case Intrinsic::riscv_vsm3c:
9907 case Intrinsic::riscv_vsm3me: {
9908 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
9909 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
9910 report_fatal_error("EGW should be greater than or equal to 8 * SEW.");
9911 return Op;
9912 }
9913 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
9914 case Intrinsic::riscv_vsha2ch:
9915 case Intrinsic::riscv_vsha2cl:
9916 case Intrinsic::riscv_vsha2ms: {
9917 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
9918 !Subtarget.hasStdExtZvknhb())
9919 report_fatal_error("SEW=64 needs Zvknhb to be enabled.");
9920 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9921 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9922 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9923 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9924 return Op;
9925 }
9926 case Intrinsic::riscv_sf_vc_v_x:
9927 case Intrinsic::riscv_sf_vc_v_i:
9928 case Intrinsic::riscv_sf_vc_v_xv:
9929 case Intrinsic::riscv_sf_vc_v_iv:
9930 case Intrinsic::riscv_sf_vc_v_vv:
9931 case Intrinsic::riscv_sf_vc_v_fv:
9932 case Intrinsic::riscv_sf_vc_v_xvv:
9933 case Intrinsic::riscv_sf_vc_v_ivv:
9934 case Intrinsic::riscv_sf_vc_v_vvv:
9935 case Intrinsic::riscv_sf_vc_v_fvv:
9936 case Intrinsic::riscv_sf_vc_v_xvw:
9937 case Intrinsic::riscv_sf_vc_v_ivw:
9938 case Intrinsic::riscv_sf_vc_v_vvw:
9939 case Intrinsic::riscv_sf_vc_v_fvw: {
9940 MVT VT = Op.getSimpleValueType();
9941
9942 SmallVector<SDValue> Operands{Op->op_values()};
9944
9945 MVT RetVT = VT;
9946 if (VT.isFixedLengthVector())
9948 else if (VT.isFloatingPoint())
9951
9952 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);
9953
9954 if (VT.isFixedLengthVector())
9955 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
9956 else if (VT.isFloatingPoint())
9957 NewNode = DAG.getBitcast(VT, NewNode);
9958
9959 if (Op == NewNode)
9960 break;
9961
9962 return NewNode;
9963 }
9964 }
9965
9966 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9967}
9968
9970 unsigned Type) {
9971 SDLoc DL(Op);
9972 SmallVector<SDValue> Operands{Op->op_values()};
9973 Operands.erase(Operands.begin() + 1);
9974
9975 const RISCVSubtarget &Subtarget =
9977 MVT VT = Op.getSimpleValueType();
9978 MVT RetVT = VT;
9979 MVT FloatVT = VT;
9980
9981 if (VT.isFloatingPoint()) {
9982 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
9984 FloatVT = RetVT;
9985 }
9986 if (VT.isFixedLengthVector())
9988 Subtarget);
9989
9991
9992 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
9993 SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);
9994 SDValue Chain = NewNode.getValue(1);
9995
9996 if (VT.isFixedLengthVector())
9997 NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
9998 if (VT.isFloatingPoint())
9999 NewNode = DAG.getBitcast(VT, NewNode);
10000
10001 NewNode = DAG.getMergeValues({NewNode, Chain}, DL);
10002
10003 return NewNode;
10004}
10005
10007 unsigned Type) {
10008 SmallVector<SDValue> Operands{Op->op_values()};
10009 Operands.erase(Operands.begin() + 1);
10011
10012 return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
10013}
10014
10015SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
10016 SelectionDAG &DAG) const {
10017 unsigned IntNo = Op.getConstantOperandVal(1);
10018 switch (IntNo) {
10019 default:
10020 break;
10021 case Intrinsic::riscv_seg2_load:
10022 case Intrinsic::riscv_seg3_load:
10023 case Intrinsic::riscv_seg4_load:
10024 case Intrinsic::riscv_seg5_load:
10025 case Intrinsic::riscv_seg6_load:
10026 case Intrinsic::riscv_seg7_load:
10027 case Intrinsic::riscv_seg8_load: {
10028 SDLoc DL(Op);
10029 static const Intrinsic::ID VlsegInts[7] = {
10030 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
10031 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
10032 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
10033 Intrinsic::riscv_vlseg8};
10034 unsigned NF = Op->getNumValues() - 1;
10035 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
10036 MVT XLenVT = Subtarget.getXLenVT();
10037 MVT VT = Op->getSimpleValueType(0);
10038 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10039 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
10040 ContainerVT.getScalarSizeInBits();
10041 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
10042
10043 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
10044 SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
10045 auto *Load = cast<MemIntrinsicSDNode>(Op);
10046
10047 SDVTList VTs = DAG.getVTList({VecTupTy, MVT::Other});
10048 SDValue Ops[] = {
10049 Load->getChain(),
10050 IntID,
10051 DAG.getUNDEF(VecTupTy),
10052 Op.getOperand(2),
10053 VL,
10054 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
10055 SDValue Result =
10057 Load->getMemoryVT(), Load->getMemOperand());
10059 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) {
10060 SDValue SubVec =
10061 DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, ContainerVT,
10062 Result.getValue(0), DAG.getVectorIdxConstant(RetIdx, DL));
10063 Results.push_back(convertFromScalableVector(VT, SubVec, DAG, Subtarget));
10064 }
10065 Results.push_back(Result.getValue(1));
10066 return DAG.getMergeValues(Results, DL);
10067 }
10068 case Intrinsic::riscv_sf_vc_v_x_se:
10070 case Intrinsic::riscv_sf_vc_v_i_se:
10072 case Intrinsic::riscv_sf_vc_v_xv_se:
10074 case Intrinsic::riscv_sf_vc_v_iv_se:
10076 case Intrinsic::riscv_sf_vc_v_vv_se:
10078 case Intrinsic::riscv_sf_vc_v_fv_se:
10080 case Intrinsic::riscv_sf_vc_v_xvv_se:
10082 case Intrinsic::riscv_sf_vc_v_ivv_se:
10084 case Intrinsic::riscv_sf_vc_v_vvv_se:
10086 case Intrinsic::riscv_sf_vc_v_fvv_se:
10088 case Intrinsic::riscv_sf_vc_v_xvw_se:
10090 case Intrinsic::riscv_sf_vc_v_ivw_se:
10092 case Intrinsic::riscv_sf_vc_v_vvw_se:
10094 case Intrinsic::riscv_sf_vc_v_fvw_se:
10096 }
10097
10098 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
10099}
10100
10101SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
10102 SelectionDAG &DAG) const {
10103 unsigned IntNo = Op.getConstantOperandVal(1);
10104 switch (IntNo) {
10105 default:
10106 break;
10107 case Intrinsic::riscv_seg2_store:
10108 case Intrinsic::riscv_seg3_store:
10109 case Intrinsic::riscv_seg4_store:
10110 case Intrinsic::riscv_seg5_store:
10111 case Intrinsic::riscv_seg6_store:
10112 case Intrinsic::riscv_seg7_store:
10113 case Intrinsic::riscv_seg8_store: {
10114 SDLoc DL(Op);
10115 static const Intrinsic::ID VssegInts[] = {
10116 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
10117 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
10118 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
10119 Intrinsic::riscv_vsseg8};
10120 // Operands are (chain, int_id, vec*, ptr, vl)
10121 unsigned NF = Op->getNumOperands() - 4;
10122 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
10123 MVT XLenVT = Subtarget.getXLenVT();
10124 MVT VT = Op->getOperand(2).getSimpleValueType();
10125 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10126 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
10127 ContainerVT.getScalarSizeInBits();
10128 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
10129
10130 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
10131 SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);
10132 SDValue Ptr = Op->getOperand(NF + 2);
10133
10134 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
10135
10136 SDValue StoredVal = DAG.getUNDEF(VecTupTy);
10137 for (unsigned i = 0; i < NF; i++)
10138 StoredVal = DAG.getNode(
10139 RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
10141 ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget),
10142 DAG.getVectorIdxConstant(i, DL));
10143
10144 SDValue Ops[] = {
10145 FixedIntrinsic->getChain(),
10146 IntID,
10147 StoredVal,
10148 Ptr,
10149 VL,
10150 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
10151
10152 return DAG.getMemIntrinsicNode(
10153 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
10154 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
10155 }
10156 case Intrinsic::riscv_sf_vc_xv_se:
10158 case Intrinsic::riscv_sf_vc_iv_se:
10160 case Intrinsic::riscv_sf_vc_vv_se:
10162 case Intrinsic::riscv_sf_vc_fv_se:
10164 case Intrinsic::riscv_sf_vc_xvv_se:
10166 case Intrinsic::riscv_sf_vc_ivv_se:
10168 case Intrinsic::riscv_sf_vc_vvv_se:
10170 case Intrinsic::riscv_sf_vc_fvv_se:
10172 case Intrinsic::riscv_sf_vc_xvw_se:
10174 case Intrinsic::riscv_sf_vc_ivw_se:
10176 case Intrinsic::riscv_sf_vc_vvw_se:
10178 case Intrinsic::riscv_sf_vc_fvw_se:
10180 }
10181
10182 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
10183}
10184
10185static unsigned getRVVReductionOp(unsigned ISDOpcode) {
10186 switch (ISDOpcode) {
10187 default:
10188 llvm_unreachable("Unhandled reduction");
10189 case ISD::VP_REDUCE_ADD:
10190 case ISD::VECREDUCE_ADD:
10192 case ISD::VP_REDUCE_UMAX:
10195 case ISD::VP_REDUCE_SMAX:
10198 case ISD::VP_REDUCE_UMIN:
10201 case ISD::VP_REDUCE_SMIN:
10204 case ISD::VP_REDUCE_AND:
10205 case ISD::VECREDUCE_AND:
10207 case ISD::VP_REDUCE_OR:
10208 case ISD::VECREDUCE_OR:
10210 case ISD::VP_REDUCE_XOR:
10211 case ISD::VECREDUCE_XOR:
10213 case ISD::VP_REDUCE_FADD:
10215 case ISD::VP_REDUCE_SEQ_FADD:
10217 case ISD::VP_REDUCE_FMAX:
10218 case ISD::VP_REDUCE_FMAXIMUM:
10220 case ISD::VP_REDUCE_FMIN:
10221 case ISD::VP_REDUCE_FMINIMUM:
10223 }
10224
10225}
10226
10227SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
10228 SelectionDAG &DAG,
10229 bool IsVP) const {
10230 SDLoc DL(Op);
10231 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
10232 MVT VecVT = Vec.getSimpleValueType();
10233 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
10234 Op.getOpcode() == ISD::VECREDUCE_OR ||
10235 Op.getOpcode() == ISD::VECREDUCE_XOR ||
10236 Op.getOpcode() == ISD::VP_REDUCE_AND ||
10237 Op.getOpcode() == ISD::VP_REDUCE_OR ||
10238 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
10239 "Unexpected reduction lowering");
10240
10241 MVT XLenVT = Subtarget.getXLenVT();
10242
10243 MVT ContainerVT = VecVT;
10244 if (VecVT.isFixedLengthVector()) {
10245 ContainerVT = getContainerForFixedLengthVector(VecVT);
10246 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10247 }
10248
10249 SDValue Mask, VL;
10250 if (IsVP) {
10251 Mask = Op.getOperand(2);
10252 VL = Op.getOperand(3);
10253 } else {
10254 std::tie(Mask, VL) =
10255 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10256 }
10257
10259 switch (Op.getOpcode()) {
10260 default:
10261 llvm_unreachable("Unhandled reduction");
10262 case ISD::VECREDUCE_AND:
10263 case ISD::VP_REDUCE_AND: {
10264 // vcpop ~x == 0
10265 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
10266 if (IsVP || VecVT.isFixedLengthVector())
10267 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
10268 else
10269 Vec = DAG.getNode(ISD::XOR, DL, ContainerVT, Vec, TrueMask);
10270 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
10271 CC = ISD::SETEQ;
10272 break;
10273 }
10274 case ISD::VECREDUCE_OR:
10275 case ISD::VP_REDUCE_OR:
10276 // vcpop x != 0
10277 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
10278 CC = ISD::SETNE;
10279 break;
10280 case ISD::VECREDUCE_XOR:
10281 case ISD::VP_REDUCE_XOR: {
10282 // ((vcpop x) & 1) != 0
10283 SDValue One = DAG.getConstant(1, DL, XLenVT);
10284 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
10285 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
10286 CC = ISD::SETNE;
10287 break;
10288 }
10289 }
10290
10291 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
10292 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
10293 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
10294
10295 if (!IsVP)
10296 return SetCC;
10297
10298 // Now include the start value in the operation.
10299 // Note that we must return the start value when no elements are operated
10300 // upon. The vcpop instructions we've emitted in each case above will return
10301 // 0 for an inactive vector, and so we've already received the neutral value:
10302 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
10303 // can simply include the start value.
10304 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
10305 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
10306}
10307
10308static bool isNonZeroAVL(SDValue AVL) {
10309 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
10310 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
10311 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
10312 (ImmAVL && ImmAVL->getZExtValue() >= 1);
10313}
10314
10315/// Helper to lower a reduction sequence of the form:
10316/// scalar = reduce_op vec, scalar_start
10317static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
10318 SDValue StartValue, SDValue Vec, SDValue Mask,
10319 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
10320 const RISCVSubtarget &Subtarget) {
10321 const MVT VecVT = Vec.getSimpleValueType();
10322 const MVT M1VT = getLMUL1VT(VecVT);
10323 const MVT XLenVT = Subtarget.getXLenVT();
10324 const bool NonZeroAVL = isNonZeroAVL(VL);
10325
10326 // The reduction needs an LMUL1 input; do the splat at either LMUL1
10327 // or the original VT if fractional.
10328 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
10329 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
10330 // prove it is non-zero. For the AVL=0 case, we need the scalar to
10331 // be the result of the reduction operation.
10332 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
10333 SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL,
10334 DAG, Subtarget);
10335 if (M1VT != InnerVT)
10336 InitialValue =
10337 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT, DAG.getUNDEF(M1VT),
10338 InitialValue, DAG.getVectorIdxConstant(0, DL));
10339 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
10340 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
10341 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
10342 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
10343 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
10344 DAG.getVectorIdxConstant(0, DL));
10345}
10346
10347SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
10348 SelectionDAG &DAG) const {
10349 SDLoc DL(Op);
10350 SDValue Vec = Op.getOperand(0);
10351 EVT VecEVT = Vec.getValueType();
10352
10353 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
10354
10355 // Due to ordering in legalize types we may have a vector type that needs to
10356 // be split. Do that manually so we can get down to a legal type.
10357 while (getTypeAction(*DAG.getContext(), VecEVT) ==
10359 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
10360 VecEVT = Lo.getValueType();
10361 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
10362 }
10363
10364 // TODO: The type may need to be widened rather than split. Or widened before
10365 // it can be split.
10366 if (!isTypeLegal(VecEVT))
10367 return SDValue();
10368
10369 MVT VecVT = VecEVT.getSimpleVT();
10370 MVT VecEltVT = VecVT.getVectorElementType();
10371 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
10372
10373 MVT ContainerVT = VecVT;
10374 if (VecVT.isFixedLengthVector()) {
10375 ContainerVT = getContainerForFixedLengthVector(VecVT);
10376 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10377 }
10378
10379 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10380
10381 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
10382 switch (BaseOpc) {
10383 case ISD::AND:
10384 case ISD::OR:
10385 case ISD::UMAX:
10386 case ISD::UMIN:
10387 case ISD::SMAX:
10388 case ISD::SMIN:
10389 StartV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Vec,
10390 DAG.getVectorIdxConstant(0, DL));
10391 }
10392 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
10393 Mask, VL, DL, DAG, Subtarget);
10394}
10395
10396// Given a reduction op, this function returns the matching reduction opcode,
10397// the vector SDValue and the scalar SDValue required to lower this to a
10398// RISCVISD node.
10399static std::tuple<unsigned, SDValue, SDValue>
10401 const RISCVSubtarget &Subtarget) {
10402 SDLoc DL(Op);
10403 auto Flags = Op->getFlags();
10404 unsigned Opcode = Op.getOpcode();
10405 switch (Opcode) {
10406 default:
10407 llvm_unreachable("Unhandled reduction");
10408 case ISD::VECREDUCE_FADD: {
10409 // Use positive zero if we can. It is cheaper to materialize.
10410 SDValue Zero =
10411 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
10412 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
10413 }
10415 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
10416 Op.getOperand(0));
10420 case ISD::VECREDUCE_FMAX: {
10421 SDValue Front =
10422 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op.getOperand(0),
10423 DAG.getVectorIdxConstant(0, DL));
10424 unsigned RVVOpc =
10425 (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)
10428 return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
10429 }
10430 }
10431}
10432
10433SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
10434 SelectionDAG &DAG) const {
10435 SDLoc DL(Op);
10436 MVT VecEltVT = Op.getSimpleValueType();
10437
10438 unsigned RVVOpcode;
10439 SDValue VectorVal, ScalarVal;
10440 std::tie(RVVOpcode, VectorVal, ScalarVal) =
10441 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
10442 MVT VecVT = VectorVal.getSimpleValueType();
10443
10444 MVT ContainerVT = VecVT;
10445 if (VecVT.isFixedLengthVector()) {
10446 ContainerVT = getContainerForFixedLengthVector(VecVT);
10447 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
10448 }
10449
10450 MVT ResVT = Op.getSimpleValueType();
10451 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10452 SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,
10453 VL, DL, DAG, Subtarget);
10454 if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
10455 Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
10456 return Res;
10457
10458 if (Op->getFlags().hasNoNaNs())
10459 return Res;
10460
10461 // Force output to NaN if any element is Nan.
10462 SDValue IsNan =
10463 DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
10464 {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),
10465 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
10466 MVT XLenVT = Subtarget.getXLenVT();
10467 SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL);
10468 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,
10469 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
10470 return DAG.getSelect(
10471 DL, ResVT, NoNaNs, Res,
10472 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
10473}
10474
10475SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
10476 SelectionDAG &DAG) const {
10477 SDLoc DL(Op);
10478 unsigned Opc = Op.getOpcode();
10479 SDValue Start = Op.getOperand(0);
10480 SDValue Vec = Op.getOperand(1);
10481 EVT VecEVT = Vec.getValueType();
10482 MVT XLenVT = Subtarget.getXLenVT();
10483
10484 // TODO: The type may need to be widened rather than split. Or widened before
10485 // it can be split.
10486 if (!isTypeLegal(VecEVT))
10487 return SDValue();
10488
10489 MVT VecVT = VecEVT.getSimpleVT();
10490 unsigned RVVOpcode = getRVVReductionOp(Opc);
10491
10492 if (VecVT.isFixedLengthVector()) {
10493 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
10494 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10495 }
10496
10497 SDValue VL = Op.getOperand(3);
10498 SDValue Mask = Op.getOperand(2);
10499 SDValue Res =
10500 lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
10501 Vec, Mask, VL, DL, DAG, Subtarget);
10502 if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) ||
10503 Op->getFlags().hasNoNaNs())
10504 return Res;
10505
10506 // Propagate NaNs.
10507 MVT PredVT = getMaskTypeFor(Vec.getSimpleValueType());
10508 // Check if any of the elements in Vec is NaN.
10509 SDValue IsNaN = DAG.getNode(
10510 RISCVISD::SETCC_VL, DL, PredVT,
10511 {Vec, Vec, DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(PredVT), Mask, VL});
10512 SDValue VCPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNaN, Mask, VL);
10513 // Check if the start value is NaN.
10514 SDValue StartIsNaN = DAG.getSetCC(DL, XLenVT, Start, Start, ISD::SETUO);
10515 VCPop = DAG.getNode(ISD::OR, DL, XLenVT, VCPop, StartIsNaN);
10516 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, VCPop,
10517 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
10518 MVT ResVT = Res.getSimpleValueType();
10519 return DAG.getSelect(
10520 DL, ResVT, NoNaNs, Res,
10521 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
10522}
10523
10524SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
10525 SelectionDAG &DAG) const {
10526 SDValue Vec = Op.getOperand(0);
10527 SDValue SubVec = Op.getOperand(1);
10528 MVT VecVT = Vec.getSimpleValueType();
10529 MVT SubVecVT = SubVec.getSimpleValueType();
10530
10531 SDLoc DL(Op);
10532 MVT XLenVT = Subtarget.getXLenVT();
10533 unsigned OrigIdx = Op.getConstantOperandVal(2);
10534 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
10535
10536 if (OrigIdx == 0 && Vec.isUndef())
10537 return Op;
10538
10539 // We don't have the ability to slide mask vectors up indexed by their i1
10540 // elements; the smallest we can do is i8. Often we are able to bitcast to
10541 // equivalent i8 vectors. Note that when inserting a fixed-length vector
10542 // into a scalable one, we might not necessarily have enough scalable
10543 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
10544 if (SubVecVT.getVectorElementType() == MVT::i1) {
10545 if (VecVT.getVectorMinNumElements() >= 8 &&
10546 SubVecVT.getVectorMinNumElements() >= 8) {
10547 assert(OrigIdx % 8 == 0 && "Invalid index");
10548 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
10549 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
10550 "Unexpected mask vector lowering");
10551 OrigIdx /= 8;
10552 SubVecVT =
10553 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
10554 SubVecVT.isScalableVector());
10555 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
10556 VecVT.isScalableVector());
10557 Vec = DAG.getBitcast(VecVT, Vec);
10558 SubVec = DAG.getBitcast(SubVecVT, SubVec);
10559 } else {
10560 // We can't slide this mask vector up indexed by its i1 elements.
10561 // This poses a problem when we wish to insert a scalable vector which
10562 // can't be re-expressed as a larger type. Just choose the slow path and
10563 // extend to a larger type, then truncate back down.
10564 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
10565 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
10566 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
10567 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
10568 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
10569 Op.getOperand(2));
10570 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
10571 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
10572 }
10573 }
10574
10575 // If the subvector vector is a fixed-length type and we don't know VLEN
10576 // exactly, we cannot use subregister manipulation to simplify the codegen; we
10577 // don't know which register of a LMUL group contains the specific subvector
10578 // as we only know the minimum register size. Therefore we must slide the
10579 // vector group up the full amount.
10580 const auto VLen = Subtarget.getRealVLen();
10581 if (SubVecVT.isFixedLengthVector() && !VLen) {
10582 MVT ContainerVT = VecVT;
10583 if (VecVT.isFixedLengthVector()) {
10584 ContainerVT = getContainerForFixedLengthVector(VecVT);
10585 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10586 }
10587
10588 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
10589 DAG.getUNDEF(ContainerVT), SubVec,
10590 DAG.getVectorIdxConstant(0, DL));
10591
10592 SDValue Mask =
10593 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
10594 // Set the vector length to only the number of elements we care about. Note
10595 // that for slideup this includes the offset.
10596 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
10597 SDValue VL = DAG.getConstant(EndIndex, DL, XLenVT);
10598
10599 // Use tail agnostic policy if we're inserting over Vec's tail.
10601 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
10602 Policy = RISCVII::TAIL_AGNOSTIC;
10603
10604 // If we're inserting into the lowest elements, use a tail undisturbed
10605 // vmv.v.v.
10606 if (OrigIdx == 0) {
10607 SubVec =
10608 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
10609 } else {
10610 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
10611 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
10612 SlideupAmt, Mask, VL, Policy);
10613 }
10614
10615 if (VecVT.isFixedLengthVector())
10616 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
10617 return DAG.getBitcast(Op.getValueType(), SubVec);
10618 }
10619
10620 MVT ContainerVecVT = VecVT;
10621 if (VecVT.isFixedLengthVector()) {
10622 ContainerVecVT = getContainerForFixedLengthVector(VecVT);
10623 Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
10624 }
10625
10626 MVT ContainerSubVecVT = SubVecVT;
10627 if (SubVecVT.isFixedLengthVector()) {
10628 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
10629 SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
10630 }
10631
10632 unsigned SubRegIdx;
10633 ElementCount RemIdx;
10634 // insert_subvector scales the index by vscale if the subvector is scalable,
10635 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
10636 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
10637 if (SubVecVT.isFixedLengthVector()) {
10638 assert(VLen);
10639 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10640 auto Decompose =
10642 ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
10643 SubRegIdx = Decompose.first;
10644 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
10645 (OrigIdx % Vscale));
10646 } else {
10647 auto Decompose =
10649 ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
10650 SubRegIdx = Decompose.first;
10651 RemIdx = ElementCount::getScalable(Decompose.second);
10652 }
10653
10656 Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));
10657 bool ExactlyVecRegSized =
10658 Subtarget.expandVScale(SubVecVT.getSizeInBits())
10659 .isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));
10660
10661 // 1. If the Idx has been completely eliminated and this subvector's size is
10662 // a vector register or a multiple thereof, or the surrounding elements are
10663 // undef, then this is a subvector insert which naturally aligns to a vector
10664 // register. These can easily be handled using subregister manipulation.
10665 // 2. If the subvector isn't an exact multiple of a valid register group size,
10666 // then the insertion must preserve the undisturbed elements of the register.
10667 // We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
10668 // vector type (which resolves to a subregister copy), performing a VSLIDEUP
10669 // to place the subvector within the vector register, and an INSERT_SUBVECTOR
10670 // of that LMUL=1 type back into the larger vector (resolving to another
10671 // subregister operation). See below for how our VSLIDEUP works. We go via a
10672 // LMUL=1 type to avoid allocating a large register group to hold our
10673 // subvector.
10674 if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {
10675 if (SubVecVT.isFixedLengthVector()) {
10676 // We may get NoSubRegister if inserting at index 0 and the subvec
10677 // container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
10678 if (SubRegIdx == RISCV::NoSubRegister) {
10679 assert(OrigIdx == 0);
10680 return Op;
10681 }
10682
10683 // Use a insert_subvector that will resolve to an insert subreg.
10684 assert(VLen);
10685 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10686 SDValue Insert =
10687 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec,
10688 DAG.getConstant(OrigIdx / Vscale, DL, XLenVT));
10689 if (VecVT.isFixedLengthVector())
10690 Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
10691 return Insert;
10692 }
10693 return Op;
10694 }
10695
10696 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
10697 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
10698 // (in our case undisturbed). This means we can set up a subvector insertion
10699 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
10700 // size of the subvector.
10701 MVT InterSubVT = ContainerVecVT;
10702 SDValue AlignedExtract = Vec;
10703 unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
10704 if (SubVecVT.isFixedLengthVector()) {
10705 assert(VLen);
10706 AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
10707 }
10708 if (ContainerVecVT.bitsGT(getLMUL1VT(ContainerVecVT))) {
10709 InterSubVT = getLMUL1VT(ContainerVecVT);
10710 // Extract a subvector equal to the nearest full vector register type. This
10711 // should resolve to a EXTRACT_SUBREG instruction.
10712 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
10713 DAG.getVectorIdxConstant(AlignedIdx, DL));
10714 }
10715
10716 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
10717 DAG.getUNDEF(InterSubVT), SubVec,
10718 DAG.getVectorIdxConstant(0, DL));
10719
10720 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);
10721
10722 ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
10723 VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());
10724
10725 // Use tail agnostic policy if we're inserting over InterSubVT's tail.
10727 if (Subtarget.expandVScale(EndIndex) ==
10728 Subtarget.expandVScale(InterSubVT.getVectorElementCount()))
10729 Policy = RISCVII::TAIL_AGNOSTIC;
10730
10731 // If we're inserting into the lowest elements, use a tail undisturbed
10732 // vmv.v.v.
10733 if (RemIdx.isZero()) {
10734 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
10735 SubVec, VL);
10736 } else {
10737 SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10738
10739 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
10740 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
10741
10742 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
10743 SlideupAmt, Mask, VL, Policy);
10744 }
10745
10746 // If required, insert this subvector back into the correct vector register.
10747 // This should resolve to an INSERT_SUBREG instruction.
10748 if (ContainerVecVT.bitsGT(InterSubVT))
10749 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec,
10750 DAG.getVectorIdxConstant(AlignedIdx, DL));
10751
10752 if (VecVT.isFixedLengthVector())
10753 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
10754
10755 // We might have bitcast from a mask type: cast back to the original type if
10756 // required.
10757 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
10758}
10759
10760SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
10761 SelectionDAG &DAG) const {
10762 SDValue Vec = Op.getOperand(0);
10763 MVT SubVecVT = Op.getSimpleValueType();
10764 MVT VecVT = Vec.getSimpleValueType();
10765
10766 SDLoc DL(Op);
10767 MVT XLenVT = Subtarget.getXLenVT();
10768 unsigned OrigIdx = Op.getConstantOperandVal(1);
10769 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
10770
10771 // With an index of 0 this is a cast-like subvector, which can be performed
10772 // with subregister operations.
10773 if (OrigIdx == 0)
10774 return Op;
10775
10776 // We don't have the ability to slide mask vectors down indexed by their i1
10777 // elements; the smallest we can do is i8. Often we are able to bitcast to
10778 // equivalent i8 vectors. Note that when extracting a fixed-length vector
10779 // from a scalable one, we might not necessarily have enough scalable
10780 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
10781 if (SubVecVT.getVectorElementType() == MVT::i1) {
10782 if (VecVT.getVectorMinNumElements() >= 8 &&
10783 SubVecVT.getVectorMinNumElements() >= 8) {
10784 assert(OrigIdx % 8 == 0 && "Invalid index");
10785 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
10786 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
10787 "Unexpected mask vector lowering");
10788 OrigIdx /= 8;
10789 SubVecVT =
10790 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
10791 SubVecVT.isScalableVector());
10792 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
10793 VecVT.isScalableVector());
10794 Vec = DAG.getBitcast(VecVT, Vec);
10795 } else {
10796 // We can't slide this mask vector down, indexed by its i1 elements.
10797 // This poses a problem when we wish to extract a scalable vector which
10798 // can't be re-expressed as a larger type. Just choose the slow path and
10799 // extend to a larger type, then truncate back down.
10800 // TODO: We could probably improve this when extracting certain fixed
10801 // from fixed, where we can extract as i8 and shift the correct element
10802 // right to reach the desired subvector?
10803 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
10804 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
10805 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
10806 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
10807 Op.getOperand(1));
10808 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
10809 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
10810 }
10811 }
10812
10813 const auto VLen = Subtarget.getRealVLen();
10814
10815 // If the subvector vector is a fixed-length type and we don't know VLEN
10816 // exactly, we cannot use subregister manipulation to simplify the codegen; we
10817 // don't know which register of a LMUL group contains the specific subvector
10818 // as we only know the minimum register size. Therefore we must slide the
10819 // vector group down the full amount.
10820 if (SubVecVT.isFixedLengthVector() && !VLen) {
10821 MVT ContainerVT = VecVT;
10822 if (VecVT.isFixedLengthVector()) {
10823 ContainerVT = getContainerForFixedLengthVector(VecVT);
10824 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10825 }
10826
10827 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
10828 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
10829 if (auto ShrunkVT =
10830 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
10831 ContainerVT = *ShrunkVT;
10832 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
10833 DAG.getVectorIdxConstant(0, DL));
10834 }
10835
10836 SDValue Mask =
10837 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
10838 // Set the vector length to only the number of elements we care about. This
10839 // avoids sliding down elements we're going to discard straight away.
10840 SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
10841 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
10842 SDValue Slidedown =
10843 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
10844 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
10845 // Now we can use a cast-like subvector extract to get the result.
10846 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10847 DAG.getVectorIdxConstant(0, DL));
10848 return DAG.getBitcast(Op.getValueType(), Slidedown);
10849 }
10850
10851 if (VecVT.isFixedLengthVector()) {
10852 VecVT = getContainerForFixedLengthVector(VecVT);
10853 Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget);
10854 }
10855
10856 MVT ContainerSubVecVT = SubVecVT;
10857 if (SubVecVT.isFixedLengthVector())
10858 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
10859
10860 unsigned SubRegIdx;
10861 ElementCount RemIdx;
10862 // extract_subvector scales the index by vscale if the subvector is scalable,
10863 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
10864 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
10865 if (SubVecVT.isFixedLengthVector()) {
10866 assert(VLen);
10867 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10868 auto Decompose =
10870 VecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
10871 SubRegIdx = Decompose.first;
10872 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
10873 (OrigIdx % Vscale));
10874 } else {
10875 auto Decompose =
10877 VecVT, ContainerSubVecVT, OrigIdx, TRI);
10878 SubRegIdx = Decompose.first;
10879 RemIdx = ElementCount::getScalable(Decompose.second);
10880 }
10881
10882 // If the Idx has been completely eliminated then this is a subvector extract
10883 // which naturally aligns to a vector register. These can easily be handled
10884 // using subregister manipulation. We use an extract_subvector that will
10885 // resolve to an extract subreg.
10886 if (RemIdx.isZero()) {
10887 if (SubVecVT.isFixedLengthVector()) {
10888 assert(VLen);
10889 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10890 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerSubVecVT, Vec,
10891 DAG.getConstant(OrigIdx / Vscale, DL, XLenVT));
10892 return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);
10893 }
10894 return Op;
10895 }
10896
10897 // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
10898 // was > M1 then the index would need to be a multiple of VLMAX, and so would
10899 // divide exactly.
10900 assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||
10901 getLMUL(ContainerSubVecVT) == RISCVII::VLMUL::LMUL_1);
10902
10903 // If the vector type is an LMUL-group type, extract a subvector equal to the
10904 // nearest full vector register type.
10905 MVT InterSubVT = VecVT;
10906 if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
10907 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
10908 // we should have successfully decomposed the extract into a subregister.
10909 // We use an extract_subvector that will resolve to a subreg extract.
10910 assert(SubRegIdx != RISCV::NoSubRegister);
10911 (void)SubRegIdx;
10912 unsigned Idx = OrigIdx - RemIdx.getKnownMinValue();
10913 if (SubVecVT.isFixedLengthVector()) {
10914 assert(VLen);
10915 Idx /= *VLen / RISCV::RVVBitsPerBlock;
10916 }
10917 InterSubVT = getLMUL1VT(VecVT);
10918 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
10919 DAG.getConstant(Idx, DL, XLenVT));
10920 }
10921
10922 // Slide this vector register down by the desired number of elements in order
10923 // to place the desired subvector starting at element 0.
10924 SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10925 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
10926 if (SubVecVT.isFixedLengthVector())
10927 VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
10928 SDValue Slidedown =
10929 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
10930 Vec, SlidedownAmt, Mask, VL);
10931
10932 // Now the vector is in the right position, extract our final subvector. This
10933 // should resolve to a COPY.
10934 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10935 DAG.getVectorIdxConstant(0, DL));
10936
10937 // We might have bitcast from a mask type: cast back to the original type if
10938 // required.
10939 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
10940}
10941
10942// Widen a vector's operands to i8, then truncate its results back to the
10943// original type, typically i1. All operand and result types must be the same.
10945 SelectionDAG &DAG) {
10946 MVT VT = N.getSimpleValueType();
10947 MVT WideVT = VT.changeVectorElementType(MVT::i8);
10949 for (SDValue Op : N->ops()) {
10950 assert(Op.getSimpleValueType() == VT &&
10951 "Operands and result must be same type");
10952 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
10953 }
10954
10955 unsigned NumVals = N->getNumValues();
10956
10958 NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
10959 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
10960 SmallVector<SDValue, 4> TruncVals;
10961 for (unsigned I = 0; I < NumVals; I++) {
10962 TruncVals.push_back(
10963 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
10964 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
10965 }
10966
10967 if (TruncVals.size() > 1)
10968 return DAG.getMergeValues(TruncVals, DL);
10969 return TruncVals.front();
10970}
10971
10972SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
10973 SelectionDAG &DAG) const {
10974 SDLoc DL(Op);
10975 MVT VecVT = Op.getSimpleValueType();
10976
10977 assert(VecVT.isScalableVector() &&
10978 "vector_interleave on non-scalable vector!");
10979
10980 // 1 bit element vectors need to be widened to e8
10981 if (VecVT.getVectorElementType() == MVT::i1)
10982 return widenVectorOpsToi8(Op, DL, DAG);
10983
10984 // If the VT is LMUL=8, we need to split and reassemble.
10985 if (VecVT.getSizeInBits().getKnownMinValue() ==
10986 (8 * RISCV::RVVBitsPerBlock)) {
10987 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10988 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10989 EVT SplitVT = Op0Lo.getValueType();
10990
10992 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op0Hi);
10994 DAG.getVTList(SplitVT, SplitVT), Op1Lo, Op1Hi);
10995
10996 SDValue Even = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10997 ResLo.getValue(0), ResHi.getValue(0));
10998 SDValue Odd = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, ResLo.getValue(1),
10999 ResHi.getValue(1));
11000 return DAG.getMergeValues({Even, Odd}, DL);
11001 }
11002
11003 // Concatenate the two vectors as one vector to deinterleave
11004 MVT ConcatVT =
11007 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
11008 Op.getOperand(0), Op.getOperand(1));
11009
11010 // We can deinterleave through vnsrl.wi if the element type is smaller than
11011 // ELEN
11012 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
11013 SDValue Even = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 0, DAG);
11014 SDValue Odd = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 1, DAG);
11015 return DAG.getMergeValues({Even, Odd}, DL);
11016 }
11017
11018 // For the indices, use the vmv.v.x of an i8 constant to fill the largest
11019 // possibly mask vector, then extract the required subvector. Doing this
11020 // (instead of a vid, vmsne sequence) reduces LMUL, and allows the mask
11021 // creation to be rematerialized during register allocation to reduce
11022 // register pressure if needed.
11023
11024 MVT MaskVT = ConcatVT.changeVectorElementType(MVT::i1);
11025
11026 SDValue EvenSplat = DAG.getConstant(0b01010101, DL, MVT::nxv8i8);
11027 EvenSplat = DAG.getBitcast(MVT::nxv64i1, EvenSplat);
11028 SDValue EvenMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MaskVT, EvenSplat,
11029 DAG.getVectorIdxConstant(0, DL));
11030
11031 SDValue OddSplat = DAG.getConstant(0b10101010, DL, MVT::nxv8i8);
11032 OddSplat = DAG.getBitcast(MVT::nxv64i1, OddSplat);
11033 SDValue OddMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MaskVT, OddSplat,
11034 DAG.getVectorIdxConstant(0, DL));
11035
11036 // vcompress the even and odd elements into two separate vectors
11037 SDValue EvenWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
11038 EvenMask, DAG.getUNDEF(ConcatVT));
11039 SDValue OddWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
11040 OddMask, DAG.getUNDEF(ConcatVT));
11041
11042 // Extract the result half of the gather for even and odd
11043 SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, EvenWide,
11044 DAG.getVectorIdxConstant(0, DL));
11045 SDValue Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, OddWide,
11046 DAG.getVectorIdxConstant(0, DL));
11047
11048 return DAG.getMergeValues({Even, Odd}, DL);
11049}
11050
11051SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
11052 SelectionDAG &DAG) const {
11053 SDLoc DL(Op);
11054 MVT VecVT = Op.getSimpleValueType();
11055
11056 assert(VecVT.isScalableVector() &&
11057 "vector_interleave on non-scalable vector!");
11058
11059 // i1 vectors need to be widened to i8
11060 if (VecVT.getVectorElementType() == MVT::i1)
11061 return widenVectorOpsToi8(Op, DL, DAG);
11062
11063 MVT XLenVT = Subtarget.getXLenVT();
11064 SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
11065
11066 // If the VT is LMUL=8, we need to split and reassemble.
11067 if (VecVT.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock)) {
11068 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
11069 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
11070 EVT SplitVT = Op0Lo.getValueType();
11071
11073 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op1Lo);
11075 DAG.getVTList(SplitVT, SplitVT), Op0Hi, Op1Hi);
11076
11077 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
11078 ResLo.getValue(0), ResLo.getValue(1));
11079 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
11080 ResHi.getValue(0), ResHi.getValue(1));
11081 return DAG.getMergeValues({Lo, Hi}, DL);
11082 }
11083
11084 SDValue Interleaved;
11085
11086 // If the element type is smaller than ELEN, then we can interleave with
11087 // vwaddu.vv and vwmaccu.vx
11088 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
11089 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
11090 DAG, Subtarget);
11091 } else {
11092 // Otherwise, fallback to using vrgathere16.vv
11093 MVT ConcatVT =
11096 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
11097 Op.getOperand(0), Op.getOperand(1));
11098
11099 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
11100
11101 // 0 1 2 3 4 5 6 7 ...
11102 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
11103
11104 // 1 1 1 1 1 1 1 1 ...
11105 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
11106
11107 // 1 0 1 0 1 0 1 0 ...
11108 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
11109 OddMask = DAG.getSetCC(
11110 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
11111 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
11113
11114 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
11115
11116 // Build up the index vector for interleaving the concatenated vector
11117 // 0 0 1 1 2 2 3 3 ...
11118 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
11119 // 0 n 1 n+1 2 n+2 3 n+3 ...
11120 Idx =
11121 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
11122
11123 // Then perform the interleave
11124 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
11125 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
11126 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
11127 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
11128 }
11129
11130 // Extract the two halves from the interleaved result
11131 SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
11132 DAG.getVectorIdxConstant(0, DL));
11133 SDValue Hi = DAG.getNode(
11134 ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
11136
11137 return DAG.getMergeValues({Lo, Hi}, DL);
11138}
11139
11140// Lower step_vector to the vid instruction. Any non-identity step value must
11141// be accounted for my manual expansion.
11142SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
11143 SelectionDAG &DAG) const {
11144 SDLoc DL(Op);
11145 MVT VT = Op.getSimpleValueType();
11146 assert(VT.isScalableVector() && "Expected scalable vector");
11147 MVT XLenVT = Subtarget.getXLenVT();
11148 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
11149 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
11150 uint64_t StepValImm = Op.getConstantOperandVal(0);
11151 if (StepValImm != 1) {
11152 if (isPowerOf2_64(StepValImm)) {
11153 SDValue StepVal =
11154 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
11155 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
11156 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
11157 } else {
11158 SDValue StepVal = lowerScalarSplat(
11159 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
11160 VL, VT, DL, DAG, Subtarget);
11161 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
11162 }
11163 }
11164 return StepVec;
11165}
11166
11167// Implement vector_reverse using vrgather.vv with indices determined by
11168// subtracting the id of each element from (VLMAX-1). This will convert
11169// the indices like so:
11170// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
11171// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
11172SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
11173 SelectionDAG &DAG) const {
11174 SDLoc DL(Op);
11175 MVT VecVT = Op.getSimpleValueType();
11176 if (VecVT.getVectorElementType() == MVT::i1) {
11177 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
11178 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
11179 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
11180 return DAG.getSetCC(DL, VecVT, Op2,
11181 DAG.getConstant(0, DL, Op2.getValueType()), ISD::SETNE);
11182 }
11183
11184 MVT ContainerVT = VecVT;
11185 SDValue Vec = Op.getOperand(0);
11186 if (VecVT.isFixedLengthVector()) {
11187 ContainerVT = getContainerForFixedLengthVector(VecVT);
11188 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11189 }
11190
11191 MVT XLenVT = Subtarget.getXLenVT();
11192 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11193
11194 // On some uarchs vrgather.vv will read from every input register for each
11195 // output register, regardless of the indices. However to reverse a vector
11196 // each output register only needs to read from one register. So decompose it
11197 // into LMUL * M1 vrgather.vvs, so we get O(LMUL) performance instead of
11198 // O(LMUL^2).
11199 //
11200 // vsetvli a1, zero, e64, m4, ta, ma
11201 // vrgatherei16.vv v12, v8, v16
11202 // ->
11203 // vsetvli a1, zero, e64, m1, ta, ma
11204 // vrgather.vv v15, v8, v16
11205 // vrgather.vv v14, v9, v16
11206 // vrgather.vv v13, v10, v16
11207 // vrgather.vv v12, v11, v16
11208 if (ContainerVT.bitsGT(getLMUL1VT(ContainerVT)) &&
11209 ContainerVT.getVectorElementCount().isKnownMultipleOf(2)) {
11210 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
11211 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, Lo.getSimpleValueType(), Lo);
11212 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, Hi.getSimpleValueType(), Hi);
11213 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ContainerVT, Hi, Lo);
11214
11215 // Fixed length vectors might not fit exactly into their container, and so
11216 // leave a gap in the front of the vector after being reversed. Slide this
11217 // away.
11218 //
11219 // x x x x 3 2 1 0 <- v4i16 @ vlen=128
11220 // 0 1 2 3 x x x x <- reverse
11221 // x x x x 0 1 2 3 <- vslidedown.vx
11222 if (VecVT.isFixedLengthVector()) {
11223 SDValue Offset = DAG.getNode(
11224 ISD::SUB, DL, XLenVT,
11225 DAG.getElementCount(DL, XLenVT, ContainerVT.getVectorElementCount()),
11226 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()));
11227 Concat =
11228 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
11229 DAG.getUNDEF(ContainerVT), Concat, Offset, Mask, VL);
11230 Concat = convertFromScalableVector(VecVT, Concat, DAG, Subtarget);
11231 }
11232 return Concat;
11233 }
11234
11235 unsigned EltSize = ContainerVT.getScalarSizeInBits();
11236 unsigned MinSize = ContainerVT.getSizeInBits().getKnownMinValue();
11237 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
11238 unsigned MaxVLMAX =
11239 VecVT.isFixedLengthVector()
11240 ? VecVT.getVectorNumElements()
11241 : RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
11242
11243 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
11244 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
11245
11246 // If this is SEW=8 and VLMAX is potentially more than 256, we need
11247 // to use vrgatherei16.vv.
11248 if (MaxVLMAX > 256 && EltSize == 8) {
11249 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
11250 // Reverse each half, then reassemble them in reverse order.
11251 // NOTE: It's also possible that after splitting that VLMAX no longer
11252 // requires vrgatherei16.vv.
11253 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
11254 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
11255 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
11256 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
11257 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
11258 // Reassemble the low and high pieces reversed.
11259 // FIXME: This is a CONCAT_VECTORS.
11260 SDValue Res =
11261 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
11262 DAG.getVectorIdxConstant(0, DL));
11263 return DAG.getNode(
11264 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
11265 DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
11266 }
11267
11268 // Just promote the int type to i16 which will double the LMUL.
11269 IntVT = MVT::getVectorVT(MVT::i16, ContainerVT.getVectorElementCount());
11270 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
11271 }
11272
11273 // At LMUL > 1, do the index computation in 16 bits to reduce register
11274 // pressure.
11275 if (IntVT.getScalarType().bitsGT(MVT::i16) &&
11276 IntVT.bitsGT(getLMUL1VT(IntVT))) {
11277 assert(isUInt<16>(MaxVLMAX - 1)); // Largest VLMAX is 65536 @ zvl65536b
11278 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
11279 IntVT = IntVT.changeVectorElementType(MVT::i16);
11280 }
11281
11282 // Calculate VLMAX-1 for the desired SEW.
11283 SDValue VLMinus1 = DAG.getNode(
11284 ISD::SUB, DL, XLenVT,
11285 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()),
11286 DAG.getConstant(1, DL, XLenVT));
11287
11288 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
11289 bool IsRV32E64 =
11290 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
11291 SDValue SplatVL;
11292 if (!IsRV32E64)
11293 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
11294 else
11295 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
11296 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
11297
11298 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
11299 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
11300 DAG.getUNDEF(IntVT), Mask, VL);
11301
11302 SDValue Gather = DAG.getNode(GatherOpc, DL, ContainerVT, Vec, Indices,
11303 DAG.getUNDEF(ContainerVT), Mask, VL);
11304 if (VecVT.isFixedLengthVector())
11305 Gather = convertFromScalableVector(VecVT, Gather, DAG, Subtarget);
11306 return Gather;
11307}
11308
11309SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
11310 SelectionDAG &DAG) const {
11311 SDLoc DL(Op);
11312 SDValue V1 = Op.getOperand(0);
11313 SDValue V2 = Op.getOperand(1);
11314 MVT XLenVT = Subtarget.getXLenVT();
11315 MVT VecVT = Op.getSimpleValueType();
11316
11317 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
11318
11319 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
11320 SDValue DownOffset, UpOffset;
11321 if (ImmValue >= 0) {
11322 // The operand is a TargetConstant, we need to rebuild it as a regular
11323 // constant.
11324 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
11325 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
11326 } else {
11327 // The operand is a TargetConstant, we need to rebuild it as a regular
11328 // constant rather than negating the original operand.
11329 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
11330 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
11331 }
11332
11333 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
11334
11335 SDValue SlideDown =
11336 getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1,
11337 DownOffset, TrueMask, UpOffset);
11338 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
11339 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
11341}
11342
11343SDValue
11344RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
11345 SelectionDAG &DAG) const {
11346 SDLoc DL(Op);
11347 auto *Load = cast<LoadSDNode>(Op);
11348
11350 Load->getMemoryVT(),
11351 *Load->getMemOperand()) &&
11352 "Expecting a correctly-aligned load");
11353
11354 MVT VT = Op.getSimpleValueType();
11355 MVT XLenVT = Subtarget.getXLenVT();
11356 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11357
11358 // If we know the exact VLEN and our fixed length vector completely fills
11359 // the container, use a whole register load instead.
11360 const auto [MinVLMAX, MaxVLMAX] =
11361 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
11362 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
11363 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
11364 MachineMemOperand *MMO = Load->getMemOperand();
11365 SDValue NewLoad =
11366 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
11367 MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),
11368 MMO->getAAInfo(), MMO->getRanges());
11369 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
11370 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
11371 }
11372
11373 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
11374
11375 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
11376 SDValue IntID = DAG.getTargetConstant(
11377 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
11378 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
11379 if (!IsMaskOp)
11380 Ops.push_back(DAG.getUNDEF(ContainerVT));
11381 Ops.push_back(Load->getBasePtr());
11382 Ops.push_back(VL);
11383 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11384 SDValue NewLoad =
11386 Load->getMemoryVT(), Load->getMemOperand());
11387
11388 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
11389 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
11390}
11391
11392SDValue
11393RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
11394 SelectionDAG &DAG) const {
11395 SDLoc DL(Op);
11396 auto *Store = cast<StoreSDNode>(Op);
11397
11399 Store->getMemoryVT(),
11400 *Store->getMemOperand()) &&
11401 "Expecting a correctly-aligned store");
11402
11403 SDValue StoreVal = Store->getValue();
11404 MVT VT = StoreVal.getSimpleValueType();
11405 MVT XLenVT = Subtarget.getXLenVT();
11406
11407 // If the size less than a byte, we need to pad with zeros to make a byte.
11408 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
11409 VT = MVT::v8i1;
11410 StoreVal =
11411 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getConstant(0, DL, VT),
11412 StoreVal, DAG.getVectorIdxConstant(0, DL));
11413 }
11414
11415 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11416
11417 SDValue NewValue =
11418 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
11419
11420 // If we know the exact VLEN and our fixed length vector completely fills
11421 // the container, use a whole register store instead.
11422 const auto [MinVLMAX, MaxVLMAX] =
11423 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
11424 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
11425 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
11426 MachineMemOperand *MMO = Store->getMemOperand();
11427 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
11428 MMO->getPointerInfo(), MMO->getBaseAlign(),
11429 MMO->getFlags(), MMO->getAAInfo());
11430 }
11431
11432 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
11433
11434 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
11435 SDValue IntID = DAG.getTargetConstant(
11436 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
11437 return DAG.getMemIntrinsicNode(
11438 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
11439 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
11440 Store->getMemoryVT(), Store->getMemOperand());
11441}
11442
11443SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
11444 SelectionDAG &DAG) const {
11445 SDLoc DL(Op);
11446 MVT VT = Op.getSimpleValueType();
11447
11448 const auto *MemSD = cast<MemSDNode>(Op);
11449 EVT MemVT = MemSD->getMemoryVT();
11450 MachineMemOperand *MMO = MemSD->getMemOperand();
11451 SDValue Chain = MemSD->getChain();
11452 SDValue BasePtr = MemSD->getBasePtr();
11453
11454 SDValue Mask, PassThru, VL;
11455 bool IsExpandingLoad = false;
11456 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
11457 Mask = VPLoad->getMask();
11458 PassThru = DAG.getUNDEF(VT);
11459 VL = VPLoad->getVectorLength();
11460 } else {
11461 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
11462 Mask = MLoad->getMask();
11463 PassThru = MLoad->getPassThru();
11464 IsExpandingLoad = MLoad->isExpandingLoad();
11465 }
11466
11467 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11468
11469 MVT XLenVT = Subtarget.getXLenVT();
11470
11471 MVT ContainerVT = VT;
11472 if (VT.isFixedLengthVector()) {
11473 ContainerVT = getContainerForFixedLengthVector(VT);
11474 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
11475 if (!IsUnmasked) {
11476 MVT MaskVT = getMaskTypeFor(ContainerVT);
11477 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11478 }
11479 }
11480
11481 if (!VL)
11482 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11483
11484 SDValue ExpandingVL;
11485 if (!IsUnmasked && IsExpandingLoad) {
11486 ExpandingVL = VL;
11487 VL =
11488 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
11489 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
11490 }
11491
11492 unsigned IntID = IsUnmasked || IsExpandingLoad ? Intrinsic::riscv_vle
11493 : Intrinsic::riscv_vle_mask;
11494 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11495 if (IntID == Intrinsic::riscv_vle)
11496 Ops.push_back(DAG.getUNDEF(ContainerVT));
11497 else
11498 Ops.push_back(PassThru);
11499 Ops.push_back(BasePtr);
11500 if (IntID == Intrinsic::riscv_vle_mask)
11501 Ops.push_back(Mask);
11502 Ops.push_back(VL);
11503 if (IntID == Intrinsic::riscv_vle_mask)
11504 Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
11505
11506 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11507
11508 SDValue Result =
11509 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
11510 Chain = Result.getValue(1);
11511 if (ExpandingVL) {
11512 MVT IndexVT = ContainerVT;
11513 if (ContainerVT.isFloatingPoint())
11514 IndexVT = ContainerVT.changeVectorElementTypeToInteger();
11515
11516 MVT IndexEltVT = IndexVT.getVectorElementType();
11517 bool UseVRGATHEREI16 = false;
11518 // If index vector is an i8 vector and the element count exceeds 256, we
11519 // should change the element type of index vector to i16 to avoid
11520 // overflow.
11521 if (IndexEltVT == MVT::i8 && VT.getVectorNumElements() > 256) {
11522 // FIXME: We need to do vector splitting manually for LMUL=8 cases.
11523 assert(getLMUL(IndexVT) != RISCVII::LMUL_8);
11524 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
11525 UseVRGATHEREI16 = true;
11526 }
11527
11528 SDValue Iota =
11529 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
11530 DAG.getConstant(Intrinsic::riscv_viota, DL, XLenVT),
11531 DAG.getUNDEF(IndexVT), Mask, ExpandingVL);
11532 Result =
11533 DAG.getNode(UseVRGATHEREI16 ? RISCVISD::VRGATHEREI16_VV_VL
11535 DL, ContainerVT, Result, Iota, PassThru, Mask, ExpandingVL);
11536 }
11537
11538 if (VT.isFixedLengthVector())
11539 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11540
11541 return DAG.getMergeValues({Result, Chain}, DL);
11542}
11543
11544SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
11545 SelectionDAG &DAG) const {
11546 SDLoc DL(Op);
11547
11548 const auto *MemSD = cast<MemSDNode>(Op);
11549 EVT MemVT = MemSD->getMemoryVT();
11550 MachineMemOperand *MMO = MemSD->getMemOperand();
11551 SDValue Chain = MemSD->getChain();
11552 SDValue BasePtr = MemSD->getBasePtr();
11553 SDValue Val, Mask, VL;
11554
11555 bool IsCompressingStore = false;
11556 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
11557 Val = VPStore->getValue();
11558 Mask = VPStore->getMask();
11559 VL = VPStore->getVectorLength();
11560 } else {
11561 const auto *MStore = cast<MaskedStoreSDNode>(Op);
11562 Val = MStore->getValue();
11563 Mask = MStore->getMask();
11564 IsCompressingStore = MStore->isCompressingStore();
11565 }
11566
11567 bool IsUnmasked =
11568 ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
11569
11570 MVT VT = Val.getSimpleValueType();
11571 MVT XLenVT = Subtarget.getXLenVT();
11572
11573 MVT ContainerVT = VT;
11574 if (VT.isFixedLengthVector()) {
11575 ContainerVT = getContainerForFixedLengthVector(VT);
11576
11577 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
11578 if (!IsUnmasked || IsCompressingStore) {
11579 MVT MaskVT = getMaskTypeFor(ContainerVT);
11580 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11581 }
11582 }
11583
11584 if (!VL)
11585 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11586
11587 if (IsCompressingStore) {
11588 Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
11589 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
11590 DAG.getUNDEF(ContainerVT), Val, Mask, VL);
11591 VL =
11592 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
11593 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
11594 }
11595
11596 unsigned IntID =
11597 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
11598 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11599 Ops.push_back(Val);
11600 Ops.push_back(BasePtr);
11601 if (!IsUnmasked)
11602 Ops.push_back(Mask);
11603 Ops.push_back(VL);
11604
11606 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
11607}
11608
11609SDValue RISCVTargetLowering::lowerVectorCompress(SDValue Op,
11610 SelectionDAG &DAG) const {
11611 SDLoc DL(Op);
11612 SDValue Val = Op.getOperand(0);
11613 SDValue Mask = Op.getOperand(1);
11614 SDValue Passthru = Op.getOperand(2);
11615
11616 MVT VT = Val.getSimpleValueType();
11617 MVT XLenVT = Subtarget.getXLenVT();
11618 MVT ContainerVT = VT;
11619 if (VT.isFixedLengthVector()) {
11620 ContainerVT = getContainerForFixedLengthVector(VT);
11621 MVT MaskVT = getMaskTypeFor(ContainerVT);
11622 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
11623 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11624 Passthru = convertToScalableVector(ContainerVT, Passthru, DAG, Subtarget);
11625 }
11626
11627 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11628 SDValue Res =
11629 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
11630 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
11631 Passthru, Val, Mask, VL);
11632
11633 if (VT.isFixedLengthVector())
11634 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
11635
11636 return Res;
11637}
11638
11639SDValue
11640RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
11641 SelectionDAG &DAG) const {
11642 MVT InVT = Op.getOperand(0).getSimpleValueType();
11643 MVT ContainerVT = getContainerForFixedLengthVector(InVT);
11644
11645 MVT VT = Op.getSimpleValueType();
11646
11647 SDValue Op1 =
11648 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
11649 SDValue Op2 =
11650 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
11651
11652 SDLoc DL(Op);
11653 auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL,
11654 DAG, Subtarget);
11655 MVT MaskVT = getMaskTypeFor(ContainerVT);
11656
11657 SDValue Cmp =
11658 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
11659 {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL});
11660
11661 return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
11662}
11663
11664SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
11665 SelectionDAG &DAG) const {
11666 unsigned Opc = Op.getOpcode();
11667 SDLoc DL(Op);
11668 SDValue Chain = Op.getOperand(0);
11669 SDValue Op1 = Op.getOperand(1);
11670 SDValue Op2 = Op.getOperand(2);
11671 SDValue CC = Op.getOperand(3);
11672 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
11673 MVT VT = Op.getSimpleValueType();
11674 MVT InVT = Op1.getSimpleValueType();
11675
11676 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
11677 // condition code.
11678 if (Opc == ISD::STRICT_FSETCCS) {
11679 // Expand strict_fsetccs(x, oeq) to
11680 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
11681 SDVTList VTList = Op->getVTList();
11682 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
11683 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
11684 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
11685 Op2, OLECCVal);
11686 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
11687 Op1, OLECCVal);
11688 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
11689 Tmp1.getValue(1), Tmp2.getValue(1));
11690 // Tmp1 and Tmp2 might be the same node.
11691 if (Tmp1 != Tmp2)
11692 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
11693 return DAG.getMergeValues({Tmp1, OutChain}, DL);
11694 }
11695
11696 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
11697 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
11698 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
11699 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
11700 Op2, OEQCCVal);
11701 SDValue Res = DAG.getNOT(DL, OEQ, VT);
11702 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
11703 }
11704 }
11705
11706 MVT ContainerInVT = InVT;
11707 if (InVT.isFixedLengthVector()) {
11708 ContainerInVT = getContainerForFixedLengthVector(InVT);
11709 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
11710 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
11711 }
11712 MVT MaskVT = getMaskTypeFor(ContainerInVT);
11713
11714 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
11715
11716 SDValue Res;
11717 if (Opc == ISD::STRICT_FSETCC &&
11718 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
11719 CCVal == ISD::SETOLE)) {
11720 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
11721 // active when both input elements are ordered.
11722 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
11723 SDValue OrderMask1 = DAG.getNode(
11724 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
11725 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
11726 True, VL});
11727 SDValue OrderMask2 = DAG.getNode(
11728 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
11729 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
11730 True, VL});
11731 Mask =
11732 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
11733 // Use Mask as the passthru operand to let the result be 0 if either of the
11734 // inputs is unordered.
11736 DAG.getVTList(MaskVT, MVT::Other),
11737 {Chain, Op1, Op2, CC, Mask, Mask, VL});
11738 } else {
11739 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
11741 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
11742 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
11743 }
11744
11745 if (VT.isFixedLengthVector()) {
11746 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
11747 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
11748 }
11749 return Res;
11750}
11751
11752// Lower vector ABS to smax(X, sub(0, X)).
11753SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
11754 SDLoc DL(Op);
11755 MVT VT = Op.getSimpleValueType();
11756 SDValue X = Op.getOperand(0);
11757
11758 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
11759 "Unexpected type for ISD::ABS");
11760
11761 MVT ContainerVT = VT;
11762 if (VT.isFixedLengthVector()) {
11763 ContainerVT = getContainerForFixedLengthVector(VT);
11764 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
11765 }
11766
11767 SDValue Mask, VL;
11768 if (Op->getOpcode() == ISD::VP_ABS) {
11769 Mask = Op->getOperand(1);
11770 if (VT.isFixedLengthVector())
11771 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
11772 Subtarget);
11773 VL = Op->getOperand(2);
11774 } else
11775 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11776
11777 SDValue SplatZero = DAG.getNode(
11778 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
11779 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
11780 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
11781 DAG.getUNDEF(ContainerVT), Mask, VL);
11782 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
11783 DAG.getUNDEF(ContainerVT), Mask, VL);
11784
11785 if (VT.isFixedLengthVector())
11786 Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
11787 return Max;
11788}
11789
11790SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
11791 SDValue Op, SelectionDAG &DAG) const {
11792 SDLoc DL(Op);
11793 MVT VT = Op.getSimpleValueType();
11794 SDValue Mag = Op.getOperand(0);
11795 SDValue Sign = Op.getOperand(1);
11796 assert(Mag.getValueType() == Sign.getValueType() &&
11797 "Can only handle COPYSIGN with matching types.");
11798
11799 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11800 Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
11801 Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
11802
11803 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11804
11805 SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag,
11806 Sign, DAG.getUNDEF(ContainerVT), Mask, VL);
11807
11808 return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
11809}
11810
11811SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
11812 SDValue Op, SelectionDAG &DAG) const {
11813 MVT VT = Op.getSimpleValueType();
11814 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11815
11816 MVT I1ContainerVT =
11817 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
11818
11819 SDValue CC =
11820 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
11821 SDValue Op1 =
11822 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
11823 SDValue Op2 =
11824 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
11825
11826 SDLoc DL(Op);
11827 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11828
11829 SDValue Select = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, Op1,
11830 Op2, DAG.getUNDEF(ContainerVT), VL);
11831
11832 return convertFromScalableVector(VT, Select, DAG, Subtarget);
11833}
11834
11835SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
11836 SelectionDAG &DAG) const {
11837 unsigned NewOpc = getRISCVVLOp(Op);
11838 bool HasPassthruOp = hasPassthruOp(NewOpc);
11839 bool HasMask = hasMaskOp(NewOpc);
11840
11841 MVT VT = Op.getSimpleValueType();
11842 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11843
11844 // Create list of operands by converting existing ones to scalable types.
11846 for (const SDValue &V : Op->op_values()) {
11847 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
11848
11849 // Pass through non-vector operands.
11850 if (!V.getValueType().isVector()) {
11851 Ops.push_back(V);
11852 continue;
11853 }
11854
11855 // "cast" fixed length vector to a scalable vector.
11856 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
11857 "Only fixed length vectors are supported!");
11858 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
11859 }
11860
11861 SDLoc DL(Op);
11862 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11863 if (HasPassthruOp)
11864 Ops.push_back(DAG.getUNDEF(ContainerVT));
11865 if (HasMask)
11866 Ops.push_back(Mask);
11867 Ops.push_back(VL);
11868
11869 // StrictFP operations have two result values. Their lowered result should
11870 // have same result count.
11871 if (Op->isStrictFPOpcode()) {
11872 SDValue ScalableRes =
11873 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
11874 Op->getFlags());
11875 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
11876 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
11877 }
11878
11879 SDValue ScalableRes =
11880 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
11881 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
11882}
11883
11884// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
11885// * Operands of each node are assumed to be in the same order.
11886// * The EVL operand is promoted from i32 to i64 on RV64.
11887// * Fixed-length vectors are converted to their scalable-vector container
11888// types.
11889SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
11890 unsigned RISCVISDOpc = getRISCVVLOp(Op);
11891 bool HasPassthruOp = hasPassthruOp(RISCVISDOpc);
11892
11893 SDLoc DL(Op);
11894 MVT VT = Op.getSimpleValueType();
11896
11897 MVT ContainerVT = VT;
11898 if (VT.isFixedLengthVector())
11899 ContainerVT = getContainerForFixedLengthVector(VT);
11900
11901 for (const auto &OpIdx : enumerate(Op->ops())) {
11902 SDValue V = OpIdx.value();
11903 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
11904 // Add dummy passthru value before the mask. Or if there isn't a mask,
11905 // before EVL.
11906 if (HasPassthruOp) {
11907 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());
11908 if (MaskIdx) {
11909 if (*MaskIdx == OpIdx.index())
11910 Ops.push_back(DAG.getUNDEF(ContainerVT));
11911 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
11912 OpIdx.index()) {
11913 if (Op.getOpcode() == ISD::VP_MERGE) {
11914 // For VP_MERGE, copy the false operand instead of an undef value.
11915 Ops.push_back(Ops.back());
11916 } else {
11917 assert(Op.getOpcode() == ISD::VP_SELECT);
11918 // For VP_SELECT, add an undef value.
11919 Ops.push_back(DAG.getUNDEF(ContainerVT));
11920 }
11921 }
11922 }
11923 // VFCVT_RM_X_F_VL requires a rounding mode to be injected before the VL.
11924 if (RISCVISDOpc == RISCVISD::VFCVT_RM_X_F_VL &&
11925 ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == OpIdx.index())
11927 Subtarget.getXLenVT()));
11928 // Pass through operands which aren't fixed-length vectors.
11929 if (!V.getValueType().isFixedLengthVector()) {
11930 Ops.push_back(V);
11931 continue;
11932 }
11933 // "cast" fixed length vector to a scalable vector.
11934 MVT OpVT = V.getSimpleValueType();
11935 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
11936 assert(useRVVForFixedLengthVectorVT(OpVT) &&
11937 "Only fixed length vectors are supported!");
11938 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
11939 }
11940
11941 if (!VT.isFixedLengthVector())
11942 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
11943
11944 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
11945
11946 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
11947}
11948
11949SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
11950 SelectionDAG &DAG) const {
11951 SDLoc DL(Op);
11952 MVT VT = Op.getSimpleValueType();
11953
11954 SDValue Src = Op.getOperand(0);
11955 // NOTE: Mask is dropped.
11956 SDValue VL = Op.getOperand(2);
11957
11958 MVT ContainerVT = VT;
11959 if (VT.isFixedLengthVector()) {
11960 ContainerVT = getContainerForFixedLengthVector(VT);
11961 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
11962 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
11963 }
11964
11965 MVT XLenVT = Subtarget.getXLenVT();
11966 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11967 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11968 DAG.getUNDEF(ContainerVT), Zero, VL);
11969
11970 SDValue SplatValue = DAG.getSignedConstant(
11971 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
11972 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11973 DAG.getUNDEF(ContainerVT), SplatValue, VL);
11974
11975 SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat,
11976 ZeroSplat, DAG.getUNDEF(ContainerVT), VL);
11977 if (!VT.isFixedLengthVector())
11978 return Result;
11979 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11980}
11981
11982SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
11983 SelectionDAG &DAG) const {
11984 SDLoc DL(Op);
11985 MVT VT = Op.getSimpleValueType();
11986
11987 SDValue Op1 = Op.getOperand(0);
11988 SDValue Op2 = Op.getOperand(1);
11989 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
11990 // NOTE: Mask is dropped.
11991 SDValue VL = Op.getOperand(4);
11992
11993 MVT ContainerVT = VT;
11994 if (VT.isFixedLengthVector()) {
11995 ContainerVT = getContainerForFixedLengthVector(VT);
11996 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11997 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11998 }
11999
12001 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
12002
12003 switch (Condition) {
12004 default:
12005 break;
12006 // X != Y --> (X^Y)
12007 case ISD::SETNE:
12008 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
12009 break;
12010 // X == Y --> ~(X^Y)
12011 case ISD::SETEQ: {
12012 SDValue Temp =
12013 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
12014 Result =
12015 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
12016 break;
12017 }
12018 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
12019 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
12020 case ISD::SETGT:
12021 case ISD::SETULT: {
12022 SDValue Temp =
12023 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
12024 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
12025 break;
12026 }
12027 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
12028 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
12029 case ISD::SETLT:
12030 case ISD::SETUGT: {
12031 SDValue Temp =
12032 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
12033 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
12034 break;
12035 }
12036 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
12037 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
12038 case ISD::SETGE:
12039 case ISD::SETULE: {
12040 SDValue Temp =
12041 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
12042 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
12043 break;
12044 }
12045 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
12046 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
12047 case ISD::SETLE:
12048 case ISD::SETUGE: {
12049 SDValue Temp =
12050 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
12051 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
12052 break;
12053 }
12054 }
12055
12056 if (!VT.isFixedLengthVector())
12057 return Result;
12058 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12059}
12060
12061// Lower Floating-Point/Integer Type-Convert VP SDNodes
12062SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
12063 SelectionDAG &DAG) const {
12064 SDLoc DL(Op);
12065
12066 SDValue Src = Op.getOperand(0);
12067 SDValue Mask = Op.getOperand(1);
12068 SDValue VL = Op.getOperand(2);
12069 unsigned RISCVISDOpc = getRISCVVLOp(Op);
12070
12071 MVT DstVT = Op.getSimpleValueType();
12072 MVT SrcVT = Src.getSimpleValueType();
12073 if (DstVT.isFixedLengthVector()) {
12074 DstVT = getContainerForFixedLengthVector(DstVT);
12075 SrcVT = getContainerForFixedLengthVector(SrcVT);
12076 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
12077 MVT MaskVT = getMaskTypeFor(DstVT);
12078 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12079 }
12080
12081 unsigned DstEltSize = DstVT.getScalarSizeInBits();
12082 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
12083
12085 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
12086 if (SrcVT.isInteger()) {
12087 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
12088
12089 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
12092
12093 // Do we need to do any pre-widening before converting?
12094 if (SrcEltSize == 1) {
12095 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
12096 MVT XLenVT = Subtarget.getXLenVT();
12097 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
12098 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
12099 DAG.getUNDEF(IntVT), Zero, VL);
12100 SDValue One = DAG.getSignedConstant(
12101 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
12102 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
12103 DAG.getUNDEF(IntVT), One, VL);
12104 Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat,
12105 ZeroSplat, DAG.getUNDEF(IntVT), VL);
12106 } else if (DstEltSize > (2 * SrcEltSize)) {
12107 // Widen before converting.
12108 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
12109 DstVT.getVectorElementCount());
12110 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
12111 }
12112
12113 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
12114 } else {
12115 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
12116 "Wrong input/output vector types");
12117
12118 // Convert f16 to f32 then convert f32 to i64.
12119 if (DstEltSize > (2 * SrcEltSize)) {
12120 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
12121 MVT InterimFVT =
12122 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
12123 Src =
12124 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
12125 }
12126
12127 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
12128 }
12129 } else { // Narrowing + Conversion
12130 if (SrcVT.isInteger()) {
12131 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
12132 // First do a narrowing convert to an FP type half the size, then round
12133 // the FP type to a small FP type if needed.
12134
12135 MVT InterimFVT = DstVT;
12136 if (SrcEltSize > (2 * DstEltSize)) {
12137 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
12138 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
12139 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
12140 }
12141
12142 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
12143
12144 if (InterimFVT != DstVT) {
12145 Src = Result;
12146 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
12147 }
12148 } else {
12149 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
12150 "Wrong input/output vector types");
12151 // First do a narrowing conversion to an integer half the size, then
12152 // truncate if needed.
12153
12154 if (DstEltSize == 1) {
12155 // First convert to the same size integer, then convert to mask using
12156 // setcc.
12157 assert(SrcEltSize >= 16 && "Unexpected FP type!");
12158 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
12159 DstVT.getVectorElementCount());
12160 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
12161
12162 // Compare the integer result to 0. The integer should be 0 or 1/-1,
12163 // otherwise the conversion was undefined.
12164 MVT XLenVT = Subtarget.getXLenVT();
12165 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
12166 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
12167 DAG.getUNDEF(InterimIVT), SplatZero, VL);
12168 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
12169 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
12170 DAG.getUNDEF(DstVT), Mask, VL});
12171 } else {
12172 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
12173 DstVT.getVectorElementCount());
12174
12175 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
12176
12177 while (InterimIVT != DstVT) {
12178 SrcEltSize /= 2;
12179 Src = Result;
12180 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
12181 DstVT.getVectorElementCount());
12182 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
12183 Src, Mask, VL);
12184 }
12185 }
12186 }
12187 }
12188
12189 MVT VT = Op.getSimpleValueType();
12190 if (!VT.isFixedLengthVector())
12191 return Result;
12192 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12193}
12194
12195SDValue RISCVTargetLowering::lowerVPMergeMask(SDValue Op,
12196 SelectionDAG &DAG) const {
12197 SDLoc DL(Op);
12198 MVT VT = Op.getSimpleValueType();
12199 MVT XLenVT = Subtarget.getXLenVT();
12200
12201 SDValue Mask = Op.getOperand(0);
12202 SDValue TrueVal = Op.getOperand(1);
12203 SDValue FalseVal = Op.getOperand(2);
12204 SDValue VL = Op.getOperand(3);
12205
12206 // Use default legalization if a vector of EVL type would be legal.
12207 EVT EVLVecVT = EVT::getVectorVT(*DAG.getContext(), VL.getValueType(),
12209 if (isTypeLegal(EVLVecVT))
12210 return SDValue();
12211
12212 MVT ContainerVT = VT;
12213 if (VT.isFixedLengthVector()) {
12214 ContainerVT = getContainerForFixedLengthVector(VT);
12215 Mask = convertToScalableVector(ContainerVT, Mask, DAG, Subtarget);
12216 TrueVal = convertToScalableVector(ContainerVT, TrueVal, DAG, Subtarget);
12217 FalseVal = convertToScalableVector(ContainerVT, FalseVal, DAG, Subtarget);
12218 }
12219
12220 // Promote to a vector of i8.
12221 MVT PromotedVT = ContainerVT.changeVectorElementType(MVT::i8);
12222
12223 // Promote TrueVal and FalseVal using VLMax.
12224 // FIXME: Is there a better way to do this?
12225 SDValue VLMax = DAG.getRegister(RISCV::X0, XLenVT);
12226 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
12227 DAG.getUNDEF(PromotedVT),
12228 DAG.getConstant(1, DL, XLenVT), VLMax);
12229 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
12230 DAG.getUNDEF(PromotedVT),
12231 DAG.getConstant(0, DL, XLenVT), VLMax);
12232 TrueVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, TrueVal, SplatOne,
12233 SplatZero, DAG.getUNDEF(PromotedVT), VL);
12234 // Any element past VL uses FalseVal, so use VLMax
12235 FalseVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, FalseVal,
12236 SplatOne, SplatZero, DAG.getUNDEF(PromotedVT), VLMax);
12237
12238 // VP_MERGE the two promoted values.
12239 SDValue VPMerge = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, Mask,
12240 TrueVal, FalseVal, FalseVal, VL);
12241
12242 // Convert back to mask.
12243 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
12244 SDValue Result = DAG.getNode(
12245 RISCVISD::SETCC_VL, DL, ContainerVT,
12246 {VPMerge, DAG.getConstant(0, DL, PromotedVT), DAG.getCondCode(ISD::SETNE),
12247 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), TrueMask, VLMax});
12248
12249 if (VT.isFixedLengthVector())
12250 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12251 return Result;
12252}
12253
12254SDValue
12255RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
12256 SelectionDAG &DAG) const {
12257 SDLoc DL(Op);
12258
12259 SDValue Op1 = Op.getOperand(0);
12260 SDValue Op2 = Op.getOperand(1);
12261 SDValue Offset = Op.getOperand(2);
12262 SDValue Mask = Op.getOperand(3);
12263 SDValue EVL1 = Op.getOperand(4);
12264 SDValue EVL2 = Op.getOperand(5);
12265
12266 const MVT XLenVT = Subtarget.getXLenVT();
12267 MVT VT = Op.getSimpleValueType();
12268 MVT ContainerVT = VT;
12269 if (VT.isFixedLengthVector()) {
12270 ContainerVT = getContainerForFixedLengthVector(VT);
12271 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
12272 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
12273 MVT MaskVT = getMaskTypeFor(ContainerVT);
12274 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12275 }
12276
12277 bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
12278 if (IsMaskVector) {
12279 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
12280
12281 // Expand input operands
12282 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12283 DAG.getUNDEF(ContainerVT),
12284 DAG.getConstant(1, DL, XLenVT), EVL1);
12285 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12286 DAG.getUNDEF(ContainerVT),
12287 DAG.getConstant(0, DL, XLenVT), EVL1);
12288 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1,
12289 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);
12290
12291 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12292 DAG.getUNDEF(ContainerVT),
12293 DAG.getConstant(1, DL, XLenVT), EVL2);
12294 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12295 DAG.getUNDEF(ContainerVT),
12296 DAG.getConstant(0, DL, XLenVT), EVL2);
12297 Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2,
12298 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
12299 }
12300
12301 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
12302 SDValue DownOffset, UpOffset;
12303 if (ImmValue >= 0) {
12304 // The operand is a TargetConstant, we need to rebuild it as a regular
12305 // constant.
12306 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
12307 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);
12308 } else {
12309 // The operand is a TargetConstant, we need to rebuild it as a regular
12310 // constant rather than negating the original operand.
12311 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
12312 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
12313 }
12314
12315 SDValue SlideDown =
12316 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
12317 Op1, DownOffset, Mask, UpOffset);
12318 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, SlideDown, Op2,
12319 UpOffset, Mask, EVL2, RISCVII::TAIL_AGNOSTIC);
12320
12321 if (IsMaskVector) {
12322 // Truncate Result back to a mask vector (Result has same EVL as Op2)
12323 Result = DAG.getNode(
12324 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
12325 {Result, DAG.getConstant(0, DL, ContainerVT),
12326 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
12327 Mask, EVL2});
12328 }
12329
12330 if (!VT.isFixedLengthVector())
12331 return Result;
12332 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12333}
12334
12335SDValue RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op,
12336 SelectionDAG &DAG) const {
12337 SDLoc DL(Op);
12338 SDValue Val = Op.getOperand(0);
12339 SDValue Mask = Op.getOperand(1);
12340 SDValue VL = Op.getOperand(2);
12341 MVT VT = Op.getSimpleValueType();
12342
12343 MVT ContainerVT = VT;
12344 if (VT.isFixedLengthVector()) {
12345 ContainerVT = getContainerForFixedLengthVector(VT);
12346 MVT MaskVT = getMaskTypeFor(ContainerVT);
12347 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12348 }
12349
12350 SDValue Result =
12351 lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL, DAG, Subtarget);
12352
12353 if (!VT.isFixedLengthVector())
12354 return Result;
12355 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12356}
12357
12358SDValue
12359RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
12360 SelectionDAG &DAG) const {
12361 SDLoc DL(Op);
12362 MVT VT = Op.getSimpleValueType();
12363 MVT XLenVT = Subtarget.getXLenVT();
12364
12365 SDValue Op1 = Op.getOperand(0);
12366 SDValue Mask = Op.getOperand(1);
12367 SDValue EVL = Op.getOperand(2);
12368
12369 MVT ContainerVT = VT;
12370 if (VT.isFixedLengthVector()) {
12371 ContainerVT = getContainerForFixedLengthVector(VT);
12372 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
12373 MVT MaskVT = getMaskTypeFor(ContainerVT);
12374 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12375 }
12376
12377 MVT GatherVT = ContainerVT;
12378 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
12379 // Check if we are working with mask vectors
12380 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
12381 if (IsMaskVector) {
12382 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
12383
12384 // Expand input operand
12385 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
12386 DAG.getUNDEF(IndicesVT),
12387 DAG.getConstant(1, DL, XLenVT), EVL);
12388 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
12389 DAG.getUNDEF(IndicesVT),
12390 DAG.getConstant(0, DL, XLenVT), EVL);
12391 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne,
12392 SplatZero, DAG.getUNDEF(IndicesVT), EVL);
12393 }
12394
12395 unsigned EltSize = GatherVT.getScalarSizeInBits();
12396 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
12397 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
12398 unsigned MaxVLMAX =
12399 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
12400
12401 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
12402 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
12403 // to use vrgatherei16.vv.
12404 // TODO: It's also possible to use vrgatherei16.vv for other types to
12405 // decrease register width for the index calculation.
12406 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
12407 if (MaxVLMAX > 256 && EltSize == 8) {
12408 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
12409 // Split the vector in half and reverse each half using a full register
12410 // reverse.
12411 // Swap the halves and concatenate them.
12412 // Slide the concatenated result by (VLMax - VL).
12413 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
12414 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
12415 auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
12416
12417 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
12418 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
12419
12420 // Reassemble the low and high pieces reversed.
12421 // NOTE: this Result is unmasked (because we do not need masks for
12422 // shuffles). If in the future this has to change, we can use a SELECT_VL
12423 // between Result and UNDEF using the mask originally passed to VP_REVERSE
12424 SDValue Result =
12425 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
12426
12427 // Slide off any elements from past EVL that were reversed into the low
12428 // elements.
12429 unsigned MinElts = GatherVT.getVectorMinNumElements();
12430 SDValue VLMax =
12431 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), MinElts));
12432 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
12433
12434 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
12435 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
12436
12437 if (IsMaskVector) {
12438 // Truncate Result back to a mask vector
12439 Result =
12440 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
12441 {Result, DAG.getConstant(0, DL, GatherVT),
12443 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
12444 }
12445
12446 if (!VT.isFixedLengthVector())
12447 return Result;
12448 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12449 }
12450
12451 // Just promote the int type to i16 which will double the LMUL.
12452 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
12453 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
12454 }
12455
12456 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
12457 SDValue VecLen =
12458 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
12459 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
12460 DAG.getUNDEF(IndicesVT), VecLen, EVL);
12461 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
12462 DAG.getUNDEF(IndicesVT), Mask, EVL);
12463 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
12464 DAG.getUNDEF(GatherVT), Mask, EVL);
12465
12466 if (IsMaskVector) {
12467 // Truncate Result back to a mask vector
12468 Result = DAG.getNode(
12469 RISCVISD::SETCC_VL, DL, ContainerVT,
12470 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
12471 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
12472 }
12473
12474 if (!VT.isFixedLengthVector())
12475 return Result;
12476 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12477}
12478
12479SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
12480 SelectionDAG &DAG) const {
12481 MVT VT = Op.getSimpleValueType();
12482 if (VT.getVectorElementType() != MVT::i1)
12483 return lowerVPOp(Op, DAG);
12484
12485 // It is safe to drop mask parameter as masked-off elements are undef.
12486 SDValue Op1 = Op->getOperand(0);
12487 SDValue Op2 = Op->getOperand(1);
12488 SDValue VL = Op->getOperand(3);
12489
12490 MVT ContainerVT = VT;
12491 const bool IsFixed = VT.isFixedLengthVector();
12492 if (IsFixed) {
12493 ContainerVT = getContainerForFixedLengthVector(VT);
12494 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
12495 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
12496 }
12497
12498 SDLoc DL(Op);
12499 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
12500 if (!IsFixed)
12501 return Val;
12502 return convertFromScalableVector(VT, Val, DAG, Subtarget);
12503}
12504
12505SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
12506 SelectionDAG &DAG) const {
12507 SDLoc DL(Op);
12508 MVT XLenVT = Subtarget.getXLenVT();
12509 MVT VT = Op.getSimpleValueType();
12510 MVT ContainerVT = VT;
12511 if (VT.isFixedLengthVector())
12512 ContainerVT = getContainerForFixedLengthVector(VT);
12513
12514 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12515
12516 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
12517 // Check if the mask is known to be all ones
12518 SDValue Mask = VPNode->getMask();
12519 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12520
12521 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
12522 : Intrinsic::riscv_vlse_mask,
12523 DL, XLenVT);
12524 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
12525 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
12526 VPNode->getStride()};
12527 if (!IsUnmasked) {
12528 if (VT.isFixedLengthVector()) {
12529 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
12530 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12531 }
12532 Ops.push_back(Mask);
12533 }
12534 Ops.push_back(VPNode->getVectorLength());
12535 if (!IsUnmasked) {
12536 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
12537 Ops.push_back(Policy);
12538 }
12539
12540 SDValue Result =
12542 VPNode->getMemoryVT(), VPNode->getMemOperand());
12543 SDValue Chain = Result.getValue(1);
12544
12545 if (VT.isFixedLengthVector())
12546 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12547
12548 return DAG.getMergeValues({Result, Chain}, DL);
12549}
12550
12551SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
12552 SelectionDAG &DAG) const {
12553 SDLoc DL(Op);
12554 MVT XLenVT = Subtarget.getXLenVT();
12555
12556 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
12557 SDValue StoreVal = VPNode->getValue();
12558 MVT VT = StoreVal.getSimpleValueType();
12559 MVT ContainerVT = VT;
12560 if (VT.isFixedLengthVector()) {
12561 ContainerVT = getContainerForFixedLengthVector(VT);
12562 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
12563 }
12564
12565 // Check if the mask is known to be all ones
12566 SDValue Mask = VPNode->getMask();
12567 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12568
12569 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
12570 : Intrinsic::riscv_vsse_mask,
12571 DL, XLenVT);
12572 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
12573 VPNode->getBasePtr(), VPNode->getStride()};
12574 if (!IsUnmasked) {
12575 if (VT.isFixedLengthVector()) {
12576 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
12577 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12578 }
12579 Ops.push_back(Mask);
12580 }
12581 Ops.push_back(VPNode->getVectorLength());
12582
12583 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
12584 Ops, VPNode->getMemoryVT(),
12585 VPNode->getMemOperand());
12586}
12587
12588// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
12589// matched to a RVV indexed load. The RVV indexed load instructions only
12590// support the "unsigned unscaled" addressing mode; indices are implicitly
12591// zero-extended or truncated to XLEN and are treated as byte offsets. Any
12592// signed or scaled indexing is extended to the XLEN value type and scaled
12593// accordingly.
12594SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
12595 SelectionDAG &DAG) const {
12596 SDLoc DL(Op);
12597 MVT VT = Op.getSimpleValueType();
12598
12599 const auto *MemSD = cast<MemSDNode>(Op.getNode());
12600 EVT MemVT = MemSD->getMemoryVT();
12601 MachineMemOperand *MMO = MemSD->getMemOperand();
12602 SDValue Chain = MemSD->getChain();
12603 SDValue BasePtr = MemSD->getBasePtr();
12604
12605 [[maybe_unused]] ISD::LoadExtType LoadExtType;
12606 SDValue Index, Mask, PassThru, VL;
12607
12608 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
12609 Index = VPGN->getIndex();
12610 Mask = VPGN->getMask();
12611 PassThru = DAG.getUNDEF(VT);
12612 VL = VPGN->getVectorLength();
12613 // VP doesn't support extending loads.
12615 } else {
12616 // Else it must be a MGATHER.
12617 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
12618 Index = MGN->getIndex();
12619 Mask = MGN->getMask();
12620 PassThru = MGN->getPassThru();
12621 LoadExtType = MGN->getExtensionType();
12622 }
12623
12624 MVT IndexVT = Index.getSimpleValueType();
12625 MVT XLenVT = Subtarget.getXLenVT();
12626
12628 "Unexpected VTs!");
12629 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
12630 // Targets have to explicitly opt-in for extending vector loads.
12631 assert(LoadExtType == ISD::NON_EXTLOAD &&
12632 "Unexpected extending MGATHER/VP_GATHER");
12633
12634 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
12635 // the selection of the masked intrinsics doesn't do this for us.
12636 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12637
12638 MVT ContainerVT = VT;
12639 if (VT.isFixedLengthVector()) {
12640 ContainerVT = getContainerForFixedLengthVector(VT);
12641 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
12642 ContainerVT.getVectorElementCount());
12643
12644 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
12645
12646 if (!IsUnmasked) {
12647 MVT MaskVT = getMaskTypeFor(ContainerVT);
12648 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12649 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
12650 }
12651 }
12652
12653 if (!VL)
12654 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
12655
12656 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
12657 IndexVT = IndexVT.changeVectorElementType(XLenVT);
12658 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
12659 }
12660
12661 unsigned IntID =
12662 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
12663 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
12664 if (IsUnmasked)
12665 Ops.push_back(DAG.getUNDEF(ContainerVT));
12666 else
12667 Ops.push_back(PassThru);
12668 Ops.push_back(BasePtr);
12669 Ops.push_back(Index);
12670 if (!IsUnmasked)
12671 Ops.push_back(Mask);
12672 Ops.push_back(VL);
12673 if (!IsUnmasked)
12675
12676 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12677 SDValue Result =
12678 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
12679 Chain = Result.getValue(1);
12680
12681 if (VT.isFixedLengthVector())
12682 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12683
12684 return DAG.getMergeValues({Result, Chain}, DL);
12685}
12686
12687// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
12688// matched to a RVV indexed store. The RVV indexed store instructions only
12689// support the "unsigned unscaled" addressing mode; indices are implicitly
12690// zero-extended or truncated to XLEN and are treated as byte offsets. Any
12691// signed or scaled indexing is extended to the XLEN value type and scaled
12692// accordingly.
12693SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
12694 SelectionDAG &DAG) const {
12695 SDLoc DL(Op);
12696 const auto *MemSD = cast<MemSDNode>(Op.getNode());
12697 EVT MemVT = MemSD->getMemoryVT();
12698 MachineMemOperand *MMO = MemSD->getMemOperand();
12699 SDValue Chain = MemSD->getChain();
12700 SDValue BasePtr = MemSD->getBasePtr();
12701
12702 [[maybe_unused]] bool IsTruncatingStore = false;
12703 SDValue Index, Mask, Val, VL;
12704
12705 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
12706 Index = VPSN->getIndex();
12707 Mask = VPSN->getMask();
12708 Val = VPSN->getValue();
12709 VL = VPSN->getVectorLength();
12710 // VP doesn't support truncating stores.
12711 IsTruncatingStore = false;
12712 } else {
12713 // Else it must be a MSCATTER.
12714 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
12715 Index = MSN->getIndex();
12716 Mask = MSN->getMask();
12717 Val = MSN->getValue();
12718 IsTruncatingStore = MSN->isTruncatingStore();
12719 }
12720
12721 MVT VT = Val.getSimpleValueType();
12722 MVT IndexVT = Index.getSimpleValueType();
12723 MVT XLenVT = Subtarget.getXLenVT();
12724
12726 "Unexpected VTs!");
12727 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
12728 // Targets have to explicitly opt-in for extending vector loads and
12729 // truncating vector stores.
12730 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
12731
12732 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
12733 // the selection of the masked intrinsics doesn't do this for us.
12734 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12735
12736 MVT ContainerVT = VT;
12737 if (VT.isFixedLengthVector()) {
12738 ContainerVT = getContainerForFixedLengthVector(VT);
12739 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
12740 ContainerVT.getVectorElementCount());
12741
12742 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
12743 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
12744
12745 if (!IsUnmasked) {
12746 MVT MaskVT = getMaskTypeFor(ContainerVT);
12747 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12748 }
12749 }
12750
12751 if (!VL)
12752 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
12753
12754 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
12755 IndexVT = IndexVT.changeVectorElementType(XLenVT);
12756 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
12757 }
12758
12759 unsigned IntID =
12760 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
12761 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
12762 Ops.push_back(Val);
12763 Ops.push_back(BasePtr);
12764 Ops.push_back(Index);
12765 if (!IsUnmasked)
12766 Ops.push_back(Mask);
12767 Ops.push_back(VL);
12768
12770 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
12771}
12772
12773SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
12774 SelectionDAG &DAG) const {
12775 const MVT XLenVT = Subtarget.getXLenVT();
12776 SDLoc DL(Op);
12777 SDValue Chain = Op->getOperand(0);
12778 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
12779 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
12780 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
12781
12782 // Encoding used for rounding mode in RISC-V differs from that used in
12783 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
12784 // table, which consists of a sequence of 4-bit fields, each representing
12785 // corresponding FLT_ROUNDS mode.
12786 static const int Table =
12792
12793 SDValue Shift =
12794 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
12795 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
12796 DAG.getConstant(Table, DL, XLenVT), Shift);
12797 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
12798 DAG.getConstant(7, DL, XLenVT));
12799
12800 return DAG.getMergeValues({Masked, Chain}, DL);
12801}
12802
12803SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
12804 SelectionDAG &DAG) const {
12805 const MVT XLenVT = Subtarget.getXLenVT();
12806 SDLoc DL(Op);
12807 SDValue Chain = Op->getOperand(0);
12808 SDValue RMValue = Op->getOperand(1);
12809 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
12810
12811 // Encoding used for rounding mode in RISC-V differs from that used in
12812 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
12813 // a table, which consists of a sequence of 4-bit fields, each representing
12814 // corresponding RISC-V mode.
12815 static const unsigned Table =
12821
12822 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
12823
12824 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
12825 DAG.getConstant(2, DL, XLenVT));
12826 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
12827 DAG.getConstant(Table, DL, XLenVT), Shift);
12828 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
12829 DAG.getConstant(0x7, DL, XLenVT));
12830 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
12831 RMValue);
12832}
12833
12834SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
12835 SelectionDAG &DAG) const {
12837
12838 bool isRISCV64 = Subtarget.is64Bit();
12839 EVT PtrVT = getPointerTy(DAG.getDataLayout());
12840
12841 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
12842 return DAG.getFrameIndex(FI, PtrVT);
12843}
12844
12845// Returns the opcode of the target-specific SDNode that implements the 32-bit
12846// form of the given Opcode.
12847static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
12848 switch (Opcode) {
12849 default:
12850 llvm_unreachable("Unexpected opcode");
12851 case ISD::SHL:
12852 return RISCVISD::SLLW;
12853 case ISD::SRA:
12854 return RISCVISD::SRAW;
12855 case ISD::SRL:
12856 return RISCVISD::SRLW;
12857 case ISD::SDIV:
12858 return RISCVISD::DIVW;
12859 case ISD::UDIV:
12860 return RISCVISD::DIVUW;
12861 case ISD::UREM:
12862 return RISCVISD::REMUW;
12863 case ISD::ROTL:
12864 return RISCVISD::ROLW;
12865 case ISD::ROTR:
12866 return RISCVISD::RORW;
12867 }
12868}
12869
12870// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
12871// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
12872// otherwise be promoted to i64, making it difficult to select the
12873// SLLW/DIVUW/.../*W later one because the fact the operation was originally of
12874// type i8/i16/i32 is lost.
12876 unsigned ExtOpc = ISD::ANY_EXTEND) {
12877 SDLoc DL(N);
12878 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
12879 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
12880 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
12881 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
12882 // ReplaceNodeResults requires we maintain the same type for the return value.
12883 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
12884}
12885
12886// Converts the given 32-bit operation to a i64 operation with signed extension
12887// semantic to reduce the signed extension instructions.
12889 SDLoc DL(N);
12890 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12891 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12892 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
12893 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
12894 DAG.getValueType(MVT::i32));
12895 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
12896}
12897
12900 SelectionDAG &DAG) const {
12901 SDLoc DL(N);
12902 switch (N->getOpcode()) {
12903 default:
12904 llvm_unreachable("Don't know how to custom type legalize this operation!");
12907 case ISD::FP_TO_SINT:
12908 case ISD::FP_TO_UINT: {
12909 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12910 "Unexpected custom legalisation");
12911 bool IsStrict = N->isStrictFPOpcode();
12912 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
12913 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
12914 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
12915 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
12917 if (!isTypeLegal(Op0.getValueType()))
12918 return;
12919 if (IsStrict) {
12920 SDValue Chain = N->getOperand(0);
12921 // In absense of Zfh, promote f16 to f32, then convert.
12922 if (Op0.getValueType() == MVT::f16 &&
12923 !Subtarget.hasStdExtZfhOrZhinx()) {
12924 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
12925 {Chain, Op0});
12926 Chain = Op0.getValue(1);
12927 }
12928 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
12930 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
12931 SDValue Res = DAG.getNode(
12932 Opc, DL, VTs, Chain, Op0,
12933 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
12934 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12935 Results.push_back(Res.getValue(1));
12936 return;
12937 }
12938 // For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then
12939 // convert.
12940 if ((Op0.getValueType() == MVT::f16 &&
12941 !Subtarget.hasStdExtZfhOrZhinx()) ||
12942 Op0.getValueType() == MVT::bf16)
12943 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
12944
12945 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
12946 SDValue Res =
12947 DAG.getNode(Opc, DL, MVT::i64, Op0,
12948 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
12949 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12950 return;
12951 }
12952 // If the FP type needs to be softened, emit a library call using the 'si'
12953 // version. If we left it to default legalization we'd end up with 'di'. If
12954 // the FP type doesn't need to be softened just let generic type
12955 // legalization promote the result type.
12956 RTLIB::Libcall LC;
12957 if (IsSigned)
12958 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
12959 else
12960 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
12961 MakeLibCallOptions CallOptions;
12962 EVT OpVT = Op0.getValueType();
12963 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
12964 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
12965 SDValue Result;
12966 std::tie(Result, Chain) =
12967 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
12968 Results.push_back(Result);
12969 if (IsStrict)
12970 Results.push_back(Chain);
12971 break;
12972 }
12973 case ISD::LROUND: {
12974 SDValue Op0 = N->getOperand(0);
12975 EVT Op0VT = Op0.getValueType();
12976 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
12978 if (!isTypeLegal(Op0VT))
12979 return;
12980
12981 // In absense of Zfh, promote f16 to f32, then convert.
12982 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
12983 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
12984
12985 SDValue Res =
12986 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
12987 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
12988 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12989 return;
12990 }
12991 // If the FP type needs to be softened, emit a library call to lround. We'll
12992 // need to truncate the result. We assume any value that doesn't fit in i32
12993 // is allowed to return an unspecified value.
12994 RTLIB::Libcall LC =
12995 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
12996 MakeLibCallOptions CallOptions;
12997 EVT OpVT = Op0.getValueType();
12998 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
12999 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
13000 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
13001 Results.push_back(Result);
13002 break;
13003 }
13006 assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "
13007 "has custom type legalization on riscv32");
13008
13009 SDValue LoCounter, HiCounter;
13010 MVT XLenVT = Subtarget.getXLenVT();
13011 if (N->getOpcode() == ISD::READCYCLECOUNTER) {
13012 LoCounter = DAG.getTargetConstant(RISCVSysReg::cycle, DL, XLenVT);
13013 HiCounter = DAG.getTargetConstant(RISCVSysReg::cycleh, DL, XLenVT);
13014 } else {
13015 LoCounter = DAG.getTargetConstant(RISCVSysReg::time, DL, XLenVT);
13016 HiCounter = DAG.getTargetConstant(RISCVSysReg::timeh, DL, XLenVT);
13017 }
13018 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
13020 N->getOperand(0), LoCounter, HiCounter);
13021
13022 Results.push_back(
13023 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
13024 Results.push_back(RCW.getValue(2));
13025 break;
13026 }
13027 case ISD::LOAD: {
13028 if (!ISD::isNON_EXTLoad(N))
13029 return;
13030
13031 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
13032 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
13033 LoadSDNode *Ld = cast<LoadSDNode>(N);
13034
13035 SDLoc dl(N);
13036 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
13037 Ld->getBasePtr(), Ld->getMemoryVT(),
13038 Ld->getMemOperand());
13039 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
13040 Results.push_back(Res.getValue(1));
13041 return;
13042 }
13043 case ISD::MUL: {
13044 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
13045 unsigned XLen = Subtarget.getXLen();
13046 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
13047 if (Size > XLen) {
13048 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
13049 SDValue LHS = N->getOperand(0);
13050 SDValue RHS = N->getOperand(1);
13051 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
13052
13053 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
13054 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
13055 // We need exactly one side to be unsigned.
13056 if (LHSIsU == RHSIsU)
13057 return;
13058
13059 auto MakeMULPair = [&](SDValue S, SDValue U) {
13060 MVT XLenVT = Subtarget.getXLenVT();
13061 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
13062 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
13063 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
13064 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
13065 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
13066 };
13067
13068 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
13069 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
13070
13071 // The other operand should be signed, but still prefer MULH when
13072 // possible.
13073 if (RHSIsU && LHSIsS && !RHSIsS)
13074 Results.push_back(MakeMULPair(LHS, RHS));
13075 else if (LHSIsU && RHSIsS && !LHSIsS)
13076 Results.push_back(MakeMULPair(RHS, LHS));
13077
13078 return;
13079 }
13080 [[fallthrough]];
13081 }
13082 case ISD::ADD:
13083 case ISD::SUB:
13084 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13085 "Unexpected custom legalisation");
13086 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
13087 break;
13088 case ISD::SHL:
13089 case ISD::SRA:
13090 case ISD::SRL:
13091 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13092 "Unexpected custom legalisation");
13093 if (N->getOperand(1).getOpcode() != ISD::Constant) {
13094 // If we can use a BSET instruction, allow default promotion to apply.
13095 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
13096 isOneConstant(N->getOperand(0)))
13097 break;
13098 Results.push_back(customLegalizeToWOp(N, DAG));
13099 break;
13100 }
13101
13102 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
13103 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
13104 // shift amount.
13105 if (N->getOpcode() == ISD::SHL) {
13106 SDLoc DL(N);
13107 SDValue NewOp0 =
13108 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13109 SDValue NewOp1 =
13110 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
13111 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
13112 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
13113 DAG.getValueType(MVT::i32));
13114 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
13115 }
13116
13117 break;
13118 case ISD::ROTL:
13119 case ISD::ROTR:
13120 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13121 "Unexpected custom legalisation");
13122 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
13123 Subtarget.hasVendorXTHeadBb()) &&
13124 "Unexpected custom legalization");
13125 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
13126 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
13127 return;
13128 Results.push_back(customLegalizeToWOp(N, DAG));
13129 break;
13130 case ISD::CTTZ:
13132 case ISD::CTLZ:
13133 case ISD::CTLZ_ZERO_UNDEF: {
13134 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13135 "Unexpected custom legalisation");
13136
13137 SDValue NewOp0 =
13138 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13139 bool IsCTZ =
13140 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
13141 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
13142 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
13143 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13144 return;
13145 }
13146 case ISD::SDIV:
13147 case ISD::UDIV:
13148 case ISD::UREM: {
13149 MVT VT = N->getSimpleValueType(0);
13150 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
13151 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
13152 "Unexpected custom legalisation");
13153 // Don't promote division/remainder by constant since we should expand those
13154 // to multiply by magic constant.
13156 if (N->getOperand(1).getOpcode() == ISD::Constant &&
13157 !isIntDivCheap(N->getValueType(0), Attr))
13158 return;
13159
13160 // If the input is i32, use ANY_EXTEND since the W instructions don't read
13161 // the upper 32 bits. For other types we need to sign or zero extend
13162 // based on the opcode.
13163 unsigned ExtOpc = ISD::ANY_EXTEND;
13164 if (VT != MVT::i32)
13165 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
13167
13168 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
13169 break;
13170 }
13171 case ISD::SADDO: {
13172 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13173 "Unexpected custom legalisation");
13174
13175 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
13176 // use the default legalization.
13177 if (!isa<ConstantSDNode>(N->getOperand(1)))
13178 return;
13179
13180 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
13181 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
13182 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
13183 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
13184 DAG.getValueType(MVT::i32));
13185
13186 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
13187
13188 // For an addition, the result should be less than one of the operands (LHS)
13189 // if and only if the other operand (RHS) is negative, otherwise there will
13190 // be overflow.
13191 // For a subtraction, the result should be less than one of the operands
13192 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
13193 // otherwise there will be overflow.
13194 EVT OType = N->getValueType(1);
13195 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
13196 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
13197
13198 SDValue Overflow =
13199 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
13200 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13201 Results.push_back(Overflow);
13202 return;
13203 }
13204 case ISD::UADDO:
13205 case ISD::USUBO: {
13206 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13207 "Unexpected custom legalisation");
13208 bool IsAdd = N->getOpcode() == ISD::UADDO;
13209 // Create an ADDW or SUBW.
13210 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13211 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13212 SDValue Res =
13213 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
13214 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
13215 DAG.getValueType(MVT::i32));
13216
13217 SDValue Overflow;
13218 if (IsAdd && isOneConstant(RHS)) {
13219 // Special case uaddo X, 1 overflowed if the addition result is 0.
13220 // The general case (X + C) < C is not necessarily beneficial. Although we
13221 // reduce the live range of X, we may introduce the materialization of
13222 // constant C, especially when the setcc result is used by branch. We have
13223 // no compare with constant and branch instructions.
13224 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
13225 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
13226 } else if (IsAdd && isAllOnesConstant(RHS)) {
13227 // Special case uaddo X, -1 overflowed if X != 0.
13228 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
13229 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
13230 } else {
13231 // Sign extend the LHS and perform an unsigned compare with the ADDW
13232 // result. Since the inputs are sign extended from i32, this is equivalent
13233 // to comparing the lower 32 bits.
13234 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
13235 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
13236 IsAdd ? ISD::SETULT : ISD::SETUGT);
13237 }
13238
13239 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13240 Results.push_back(Overflow);
13241 return;
13242 }
13243 case ISD::UADDSAT:
13244 case ISD::USUBSAT: {
13245 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13246 !Subtarget.hasStdExtZbb() && "Unexpected custom legalisation");
13247 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
13248 // promotion for UADDO/USUBO.
13249 Results.push_back(expandAddSubSat(N, DAG));
13250 return;
13251 }
13252 case ISD::SADDSAT:
13253 case ISD::SSUBSAT: {
13254 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13255 "Unexpected custom legalisation");
13256 Results.push_back(expandAddSubSat(N, DAG));
13257 return;
13258 }
13259 case ISD::ABS: {
13260 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13261 "Unexpected custom legalisation");
13262
13263 if (Subtarget.hasStdExtZbb()) {
13264 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
13265 // This allows us to remember that the result is sign extended. Expanding
13266 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
13267 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
13268 N->getOperand(0));
13269 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
13270 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
13271 return;
13272 }
13273
13274 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
13275 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13276
13277 // Freeze the source so we can increase it's use count.
13278 Src = DAG.getFreeze(Src);
13279
13280 // Copy sign bit to all bits using the sraiw pattern.
13281 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
13282 DAG.getValueType(MVT::i32));
13283 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
13284 DAG.getConstant(31, DL, MVT::i64));
13285
13286 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
13287 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
13288
13289 // NOTE: The result is only required to be anyextended, but sext is
13290 // consistent with type legalization of sub.
13291 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
13292 DAG.getValueType(MVT::i32));
13293 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
13294 return;
13295 }
13296 case ISD::BITCAST: {
13297 EVT VT = N->getValueType(0);
13298 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
13299 SDValue Op0 = N->getOperand(0);
13300 EVT Op0VT = Op0.getValueType();
13301 MVT XLenVT = Subtarget.getXLenVT();
13302 if (VT == MVT::i16 &&
13303 ((Op0VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
13304 (Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
13305 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
13306 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
13307 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
13308 Subtarget.hasStdExtFOrZfinx()) {
13309 SDValue FPConv =
13310 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
13311 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
13312 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && !Subtarget.is64Bit() &&
13313 Subtarget.hasStdExtDOrZdinx()) {
13314 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
13315 DAG.getVTList(MVT::i32, MVT::i32), Op0);
13316 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
13317 NewReg.getValue(0), NewReg.getValue(1));
13318 Results.push_back(RetReg);
13319 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
13320 isTypeLegal(Op0VT)) {
13321 // Custom-legalize bitcasts from fixed-length vector types to illegal
13322 // scalar types in order to improve codegen. Bitcast the vector to a
13323 // one-element vector type whose element type is the same as the result
13324 // type, and extract the first element.
13325 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
13326 if (isTypeLegal(BVT)) {
13327 SDValue BVec = DAG.getBitcast(BVT, Op0);
13328 Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
13329 DAG.getVectorIdxConstant(0, DL)));
13330 }
13331 }
13332 break;
13333 }
13334 case RISCVISD::BREV8:
13335 case RISCVISD::ORC_B: {
13336 MVT VT = N->getSimpleValueType(0);
13337 MVT XLenVT = Subtarget.getXLenVT();
13338 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
13339 "Unexpected custom legalisation");
13340 assert(((N->getOpcode() == RISCVISD::BREV8 && Subtarget.hasStdExtZbkb()) ||
13341 (N->getOpcode() == RISCVISD::ORC_B && Subtarget.hasStdExtZbb())) &&
13342 "Unexpected extension");
13343 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
13344 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
13345 // ReplaceNodeResults requires we maintain the same type for the return
13346 // value.
13347 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
13348 break;
13349 }
13351 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
13352 // type is illegal (currently only vXi64 RV32).
13353 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
13354 // transferred to the destination register. We issue two of these from the
13355 // upper- and lower- halves of the SEW-bit vector element, slid down to the
13356 // first element.
13357 SDValue Vec = N->getOperand(0);
13358 SDValue Idx = N->getOperand(1);
13359
13360 // The vector type hasn't been legalized yet so we can't issue target
13361 // specific nodes if it needs legalization.
13362 // FIXME: We would manually legalize if it's important.
13363 if (!isTypeLegal(Vec.getValueType()))
13364 return;
13365
13366 MVT VecVT = Vec.getSimpleValueType();
13367
13368 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
13369 VecVT.getVectorElementType() == MVT::i64 &&
13370 "Unexpected EXTRACT_VECTOR_ELT legalization");
13371
13372 // If this is a fixed vector, we need to convert it to a scalable vector.
13373 MVT ContainerVT = VecVT;
13374 if (VecVT.isFixedLengthVector()) {
13375 ContainerVT = getContainerForFixedLengthVector(VecVT);
13376 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
13377 }
13378
13379 MVT XLenVT = Subtarget.getXLenVT();
13380
13381 // Use a VL of 1 to avoid processing more elements than we need.
13382 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
13383
13384 // Unless the index is known to be 0, we must slide the vector down to get
13385 // the desired element into index 0.
13386 if (!isNullConstant(Idx)) {
13387 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
13388 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
13389 }
13390
13391 // Extract the lower XLEN bits of the correct vector element.
13392 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
13393
13394 // To extract the upper XLEN bits of the vector element, shift the first
13395 // element right by 32 bits and re-extract the lower XLEN bits.
13396 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13397 DAG.getUNDEF(ContainerVT),
13398 DAG.getConstant(32, DL, XLenVT), VL);
13399 SDValue LShr32 =
13400 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
13401 DAG.getUNDEF(ContainerVT), Mask, VL);
13402
13403 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
13404
13405 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
13406 break;
13407 }
13409 unsigned IntNo = N->getConstantOperandVal(0);
13410 switch (IntNo) {
13411 default:
13413 "Don't know how to custom type legalize this intrinsic!");
13414 case Intrinsic::experimental_get_vector_length: {
13415 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
13416 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13417 return;
13418 }
13419 case Intrinsic::experimental_cttz_elts: {
13420 SDValue Res = lowerCttzElts(N, DAG, Subtarget);
13421 Results.push_back(
13422 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res));
13423 return;
13424 }
13425 case Intrinsic::riscv_orc_b:
13426 case Intrinsic::riscv_brev8:
13427 case Intrinsic::riscv_sha256sig0:
13428 case Intrinsic::riscv_sha256sig1:
13429 case Intrinsic::riscv_sha256sum0:
13430 case Intrinsic::riscv_sha256sum1:
13431 case Intrinsic::riscv_sm3p0:
13432 case Intrinsic::riscv_sm3p1: {
13433 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13434 return;
13435 unsigned Opc;
13436 switch (IntNo) {
13437 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
13438 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
13439 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
13440 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
13441 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
13442 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
13443 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
13444 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
13445 }
13446
13447 SDValue NewOp =
13448 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13449 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
13450 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13451 return;
13452 }
13453 case Intrinsic::riscv_sm4ks:
13454 case Intrinsic::riscv_sm4ed: {
13455 unsigned Opc =
13456 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
13457 SDValue NewOp0 =
13458 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13459 SDValue NewOp1 =
13460 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13461 SDValue Res =
13462 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
13463 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13464 return;
13465 }
13466 case Intrinsic::riscv_mopr: {
13467 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13468 return;
13469 SDValue NewOp =
13470 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13471 SDValue Res = DAG.getNode(
13472 RISCVISD::MOPR, DL, MVT::i64, NewOp,
13473 DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
13474 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13475 return;
13476 }
13477 case Intrinsic::riscv_moprr: {
13478 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13479 return;
13480 SDValue NewOp0 =
13481 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13482 SDValue NewOp1 =
13483 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13484 SDValue Res = DAG.getNode(
13485 RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
13486 DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
13487 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13488 return;
13489 }
13490 case Intrinsic::riscv_clmul: {
13491 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13492 return;
13493
13494 SDValue NewOp0 =
13495 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13496 SDValue NewOp1 =
13497 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13498 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
13499 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13500 return;
13501 }
13502 case Intrinsic::riscv_clmulh:
13503 case Intrinsic::riscv_clmulr: {
13504 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13505 return;
13506
13507 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
13508 // to the full 128-bit clmul result of multiplying two xlen values.
13509 // Perform clmulr or clmulh on the shifted values. Finally, extract the
13510 // upper 32 bits.
13511 //
13512 // The alternative is to mask the inputs to 32 bits and use clmul, but
13513 // that requires two shifts to mask each input without zext.w.
13514 // FIXME: If the inputs are known zero extended or could be freely
13515 // zero extended, the mask form would be better.
13516 SDValue NewOp0 =
13517 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13518 SDValue NewOp1 =
13519 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13520 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
13521 DAG.getConstant(32, DL, MVT::i64));
13522 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
13523 DAG.getConstant(32, DL, MVT::i64));
13524 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
13526 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
13527 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
13528 DAG.getConstant(32, DL, MVT::i64));
13529 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13530 return;
13531 }
13532 case Intrinsic::riscv_vmv_x_s: {
13533 EVT VT = N->getValueType(0);
13534 MVT XLenVT = Subtarget.getXLenVT();
13535 if (VT.bitsLT(XLenVT)) {
13536 // Simple case just extract using vmv.x.s and truncate.
13537 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
13538 Subtarget.getXLenVT(), N->getOperand(1));
13539 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
13540 return;
13541 }
13542
13543 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
13544 "Unexpected custom legalization");
13545
13546 // We need to do the move in two steps.
13547 SDValue Vec = N->getOperand(1);
13548 MVT VecVT = Vec.getSimpleValueType();
13549
13550 // First extract the lower XLEN bits of the element.
13551 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
13552
13553 // To extract the upper XLEN bits of the vector element, shift the first
13554 // element right by 32 bits and re-extract the lower XLEN bits.
13555 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
13556
13557 SDValue ThirtyTwoV =
13558 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
13559 DAG.getConstant(32, DL, XLenVT), VL);
13560 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
13561 DAG.getUNDEF(VecVT), Mask, VL);
13562 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
13563
13564 Results.push_back(
13565 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
13566 break;
13567 }
13568 }
13569 break;
13570 }
13571 case ISD::VECREDUCE_ADD:
13572 case ISD::VECREDUCE_AND:
13573 case ISD::VECREDUCE_OR:
13574 case ISD::VECREDUCE_XOR:
13579 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
13580 Results.push_back(V);
13581 break;
13582 case ISD::VP_REDUCE_ADD:
13583 case ISD::VP_REDUCE_AND:
13584 case ISD::VP_REDUCE_OR:
13585 case ISD::VP_REDUCE_XOR:
13586 case ISD::VP_REDUCE_SMAX:
13587 case ISD::VP_REDUCE_UMAX:
13588 case ISD::VP_REDUCE_SMIN:
13589 case ISD::VP_REDUCE_UMIN:
13590 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
13591 Results.push_back(V);
13592 break;
13593 case ISD::GET_ROUNDING: {
13594 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
13595 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
13596 Results.push_back(Res.getValue(0));
13597 Results.push_back(Res.getValue(1));
13598 break;
13599 }
13600 }
13601}
13602
13603/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
13604/// which corresponds to it.
13605static unsigned getVecReduceOpcode(unsigned Opc) {
13606 switch (Opc) {
13607 default:
13608 llvm_unreachable("Unhandled binary to transfrom reduction");
13609 case ISD::ADD:
13610 return ISD::VECREDUCE_ADD;
13611 case ISD::UMAX:
13612 return ISD::VECREDUCE_UMAX;
13613 case ISD::SMAX:
13614 return ISD::VECREDUCE_SMAX;
13615 case ISD::UMIN:
13616 return ISD::VECREDUCE_UMIN;
13617 case ISD::SMIN:
13618 return ISD::VECREDUCE_SMIN;
13619 case ISD::AND:
13620 return ISD::VECREDUCE_AND;
13621 case ISD::OR:
13622 return ISD::VECREDUCE_OR;
13623 case ISD::XOR:
13624 return ISD::VECREDUCE_XOR;
13625 case ISD::FADD:
13626 // Note: This is the associative form of the generic reduction opcode.
13627 return ISD::VECREDUCE_FADD;
13628 }
13629}
13630
13631/// Perform two related transforms whose purpose is to incrementally recognize
13632/// an explode_vector followed by scalar reduction as a vector reduction node.
13633/// This exists to recover from a deficiency in SLP which can't handle
13634/// forests with multiple roots sharing common nodes. In some cases, one
13635/// of the trees will be vectorized, and the other will remain (unprofitably)
13636/// scalarized.
13637static SDValue
13639 const RISCVSubtarget &Subtarget) {
13640
13641 // This transforms need to run before all integer types have been legalized
13642 // to i64 (so that the vector element type matches the add type), and while
13643 // it's safe to introduce odd sized vector types.
13645 return SDValue();
13646
13647 // Without V, this transform isn't useful. We could form the (illegal)
13648 // operations and let them be scalarized again, but there's really no point.
13649 if (!Subtarget.hasVInstructions())
13650 return SDValue();
13651
13652 const SDLoc DL(N);
13653 const EVT VT = N->getValueType(0);
13654 const unsigned Opc = N->getOpcode();
13655
13656 // For FADD, we only handle the case with reassociation allowed. We
13657 // could handle strict reduction order, but at the moment, there's no
13658 // known reason to, and the complexity isn't worth it.
13659 // TODO: Handle fminnum and fmaxnum here
13660 if (!VT.isInteger() &&
13661 (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation()))
13662 return SDValue();
13663
13664 const unsigned ReduceOpc = getVecReduceOpcode(Opc);
13665 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
13666 "Inconsistent mappings");
13667 SDValue LHS = N->getOperand(0);
13668 SDValue RHS = N->getOperand(1);
13669
13670 if (!LHS.hasOneUse() || !RHS.hasOneUse())
13671 return SDValue();
13672
13673 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
13674 std::swap(LHS, RHS);
13675
13676 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
13677 !isa<ConstantSDNode>(RHS.getOperand(1)))
13678 return SDValue();
13679
13680 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
13681 SDValue SrcVec = RHS.getOperand(0);
13682 EVT SrcVecVT = SrcVec.getValueType();
13683 assert(SrcVecVT.getVectorElementType() == VT);
13684 if (SrcVecVT.isScalableVector())
13685 return SDValue();
13686
13687 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
13688 return SDValue();
13689
13690 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
13691 // reduce_op (extract_subvector [2 x VT] from V). This will form the
13692 // root of our reduction tree. TODO: We could extend this to any two
13693 // adjacent aligned constant indices if desired.
13694 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
13695 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
13696 uint64_t LHSIdx =
13697 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
13698 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
13699 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
13700 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
13701 DAG.getVectorIdxConstant(0, DL));
13702 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
13703 }
13704 }
13705
13706 // Match (binop (reduce (extract_subvector V, 0),
13707 // (extract_vector_elt V, sizeof(SubVec))))
13708 // into a reduction of one more element from the original vector V.
13709 if (LHS.getOpcode() != ReduceOpc)
13710 return SDValue();
13711
13712 SDValue ReduceVec = LHS.getOperand(0);
13713 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
13714 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
13715 isNullConstant(ReduceVec.getOperand(1)) &&
13716 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
13717 // For illegal types (e.g. 3xi32), most will be combined again into a
13718 // wider (hopefully legal) type. If this is a terminal state, we are
13719 // relying on type legalization here to produce something reasonable
13720 // and this lowering quality could probably be improved. (TODO)
13721 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
13722 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
13723 DAG.getVectorIdxConstant(0, DL));
13724 return DAG.getNode(ReduceOpc, DL, VT, Vec,
13725 ReduceVec->getFlags() & N->getFlags());
13726 }
13727
13728 return SDValue();
13729}
13730
13731
13732// Try to fold (<bop> x, (reduction.<bop> vec, start))
13734 const RISCVSubtarget &Subtarget) {
13735 auto BinOpToRVVReduce = [](unsigned Opc) {
13736 switch (Opc) {
13737 default:
13738 llvm_unreachable("Unhandled binary to transfrom reduction");
13739 case ISD::ADD:
13741 case ISD::UMAX:
13743 case ISD::SMAX:
13745 case ISD::UMIN:
13747 case ISD::SMIN:
13749 case ISD::AND:
13751 case ISD::OR:
13753 case ISD::XOR:
13755 case ISD::FADD:
13757 case ISD::FMAXNUM:
13759 case ISD::FMINNUM:
13761 }
13762 };
13763
13764 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
13765 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
13766 isNullConstant(V.getOperand(1)) &&
13767 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
13768 };
13769
13770 unsigned Opc = N->getOpcode();
13771 unsigned ReduceIdx;
13772 if (IsReduction(N->getOperand(0), Opc))
13773 ReduceIdx = 0;
13774 else if (IsReduction(N->getOperand(1), Opc))
13775 ReduceIdx = 1;
13776 else
13777 return SDValue();
13778
13779 // Skip if FADD disallows reassociation but the combiner needs.
13780 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
13781 return SDValue();
13782
13783 SDValue Extract = N->getOperand(ReduceIdx);
13784 SDValue Reduce = Extract.getOperand(0);
13785 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
13786 return SDValue();
13787
13788 SDValue ScalarV = Reduce.getOperand(2);
13789 EVT ScalarVT = ScalarV.getValueType();
13790 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
13791 ScalarV.getOperand(0)->isUndef() &&
13792 isNullConstant(ScalarV.getOperand(2)))
13793 ScalarV = ScalarV.getOperand(1);
13794
13795 // Make sure that ScalarV is a splat with VL=1.
13796 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
13797 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
13798 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
13799 return SDValue();
13800
13801 if (!isNonZeroAVL(ScalarV.getOperand(2)))
13802 return SDValue();
13803
13804 // Check the scalar of ScalarV is neutral element
13805 // TODO: Deal with value other than neutral element.
13806 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
13807 0))
13808 return SDValue();
13809
13810 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
13811 // FIXME: We might be able to improve this if operand 0 is undef.
13812 if (!isNonZeroAVL(Reduce.getOperand(5)))
13813 return SDValue();
13814
13815 SDValue NewStart = N->getOperand(1 - ReduceIdx);
13816
13817 SDLoc DL(N);
13818 SDValue NewScalarV =
13819 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
13820 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
13821
13822 // If we looked through an INSERT_SUBVECTOR we need to restore it.
13823 if (ScalarVT != ScalarV.getValueType())
13824 NewScalarV =
13825 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ScalarVT, DAG.getUNDEF(ScalarVT),
13826 NewScalarV, DAG.getVectorIdxConstant(0, DL));
13827
13828 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
13829 NewScalarV, Reduce.getOperand(3),
13830 Reduce.getOperand(4), Reduce.getOperand(5)};
13831 SDValue NewReduce =
13832 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
13833 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
13834 Extract.getOperand(1));
13835}
13836
13837// Optimize (add (shl x, c0), (shl y, c1)) ->
13838// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
13840 const RISCVSubtarget &Subtarget) {
13841 // Perform this optimization only in the zba extension.
13842 if (!Subtarget.hasStdExtZba())
13843 return SDValue();
13844
13845 // Skip for vector types and larger types.
13846 EVT VT = N->getValueType(0);
13847 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
13848 return SDValue();
13849
13850 // The two operand nodes must be SHL and have no other use.
13851 SDValue N0 = N->getOperand(0);
13852 SDValue N1 = N->getOperand(1);
13853 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
13854 !N0->hasOneUse() || !N1->hasOneUse())
13855 return SDValue();
13856
13857 // Check c0 and c1.
13858 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
13859 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
13860 if (!N0C || !N1C)
13861 return SDValue();
13862 int64_t C0 = N0C->getSExtValue();
13863 int64_t C1 = N1C->getSExtValue();
13864 if (C0 <= 0 || C1 <= 0)
13865 return SDValue();
13866
13867 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
13868 int64_t Bits = std::min(C0, C1);
13869 int64_t Diff = std::abs(C0 - C1);
13870 if (Diff != 1 && Diff != 2 && Diff != 3)
13871 return SDValue();
13872
13873 // Build nodes.
13874 SDLoc DL(N);
13875 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
13876 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
13877 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, NL,
13878 DAG.getConstant(Diff, DL, VT), NS);
13879 return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT));
13880}
13881
13882// Combine a constant select operand into its use:
13883//
13884// (and (select cond, -1, c), x)
13885// -> (select cond, x, (and x, c)) [AllOnes=1]
13886// (or (select cond, 0, c), x)
13887// -> (select cond, x, (or x, c)) [AllOnes=0]
13888// (xor (select cond, 0, c), x)
13889// -> (select cond, x, (xor x, c)) [AllOnes=0]
13890// (add (select cond, 0, c), x)
13891// -> (select cond, x, (add x, c)) [AllOnes=0]
13892// (sub x, (select cond, 0, c))
13893// -> (select cond, x, (sub x, c)) [AllOnes=0]
13895 SelectionDAG &DAG, bool AllOnes,
13896 const RISCVSubtarget &Subtarget) {
13897 EVT VT = N->getValueType(0);
13898
13899 // Skip vectors.
13900 if (VT.isVector())
13901 return SDValue();
13902
13903 if (!Subtarget.hasConditionalMoveFusion()) {
13904 // (select cond, x, (and x, c)) has custom lowering with Zicond.
13905 if ((!Subtarget.hasStdExtZicond() &&
13906 !Subtarget.hasVendorXVentanaCondOps()) ||
13907 N->getOpcode() != ISD::AND)
13908 return SDValue();
13909
13910 // Maybe harmful when condition code has multiple use.
13911 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
13912 return SDValue();
13913
13914 // Maybe harmful when VT is wider than XLen.
13915 if (VT.getSizeInBits() > Subtarget.getXLen())
13916 return SDValue();
13917 }
13918
13919 if ((Slct.getOpcode() != ISD::SELECT &&
13920 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
13921 !Slct.hasOneUse())
13922 return SDValue();
13923
13924 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
13926 };
13927
13928 bool SwapSelectOps;
13929 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
13930 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
13931 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
13932 SDValue NonConstantVal;
13933 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
13934 SwapSelectOps = false;
13935 NonConstantVal = FalseVal;
13936 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
13937 SwapSelectOps = true;
13938 NonConstantVal = TrueVal;
13939 } else
13940 return SDValue();
13941
13942 // Slct is now know to be the desired identity constant when CC is true.
13943 TrueVal = OtherOp;
13944 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
13945 // Unless SwapSelectOps says the condition should be false.
13946 if (SwapSelectOps)
13947 std::swap(TrueVal, FalseVal);
13948
13949 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
13950 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
13951 {Slct.getOperand(0), Slct.getOperand(1),
13952 Slct.getOperand(2), TrueVal, FalseVal});
13953
13954 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
13955 {Slct.getOperand(0), TrueVal, FalseVal});
13956}
13957
13958// Attempt combineSelectAndUse on each operand of a commutative operator N.
13960 bool AllOnes,
13961 const RISCVSubtarget &Subtarget) {
13962 SDValue N0 = N->getOperand(0);
13963 SDValue N1 = N->getOperand(1);
13964 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
13965 return Result;
13966 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
13967 return Result;
13968 return SDValue();
13969}
13970
13971// Transform (add (mul x, c0), c1) ->
13972// (add (mul (add x, c1/c0), c0), c1%c0).
13973// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
13974// that should be excluded is when c0*(c1/c0) is simm12, which will lead
13975// to an infinite loop in DAGCombine if transformed.
13976// Or transform (add (mul x, c0), c1) ->
13977// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
13978// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
13979// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
13980// lead to an infinite loop in DAGCombine if transformed.
13981// Or transform (add (mul x, c0), c1) ->
13982// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
13983// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
13984// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
13985// lead to an infinite loop in DAGCombine if transformed.
13986// Or transform (add (mul x, c0), c1) ->
13987// (mul (add x, c1/c0), c0).
13988// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
13990 const RISCVSubtarget &Subtarget) {
13991 // Skip for vector types and larger types.
13992 EVT VT = N->getValueType(0);
13993 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
13994 return SDValue();
13995 // The first operand node must be a MUL and has no other use.
13996 SDValue N0 = N->getOperand(0);
13997 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
13998 return SDValue();
13999 // Check if c0 and c1 match above conditions.
14000 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
14001 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14002 if (!N0C || !N1C)
14003 return SDValue();
14004 // If N0C has multiple uses it's possible one of the cases in
14005 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
14006 // in an infinite loop.
14007 if (!N0C->hasOneUse())
14008 return SDValue();
14009 int64_t C0 = N0C->getSExtValue();
14010 int64_t C1 = N1C->getSExtValue();
14011 int64_t CA, CB;
14012 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
14013 return SDValue();
14014 // Search for proper CA (non-zero) and CB that both are simm12.
14015 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
14016 !isInt<12>(C0 * (C1 / C0))) {
14017 CA = C1 / C0;
14018 CB = C1 % C0;
14019 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
14020 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
14021 CA = C1 / C0 + 1;
14022 CB = C1 % C0 - C0;
14023 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
14024 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
14025 CA = C1 / C0 - 1;
14026 CB = C1 % C0 + C0;
14027 } else
14028 return SDValue();
14029 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
14030 SDLoc DL(N);
14031 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
14032 DAG.getSignedConstant(CA, DL, VT));
14033 SDValue New1 =
14034 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getSignedConstant(C0, DL, VT));
14035 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getSignedConstant(CB, DL, VT));
14036}
14037
14038// add (zext, zext) -> zext (add (zext, zext))
14039// sub (zext, zext) -> sext (sub (zext, zext))
14040// mul (zext, zext) -> zext (mul (zext, zext))
14041// sdiv (zext, zext) -> zext (sdiv (zext, zext))
14042// udiv (zext, zext) -> zext (udiv (zext, zext))
14043// srem (zext, zext) -> zext (srem (zext, zext))
14044// urem (zext, zext) -> zext (urem (zext, zext))
14045//
14046// where the sum of the extend widths match, and the the range of the bin op
14047// fits inside the width of the narrower bin op. (For profitability on rvv, we
14048// use a power of two for both inner and outer extend.)
14050
14051 EVT VT = N->getValueType(0);
14052 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
14053 return SDValue();
14054
14055 SDValue N0 = N->getOperand(0);
14056 SDValue N1 = N->getOperand(1);
14058 return SDValue();
14059 if (!N0.hasOneUse() || !N1.hasOneUse())
14060 return SDValue();
14061
14062 SDValue Src0 = N0.getOperand(0);
14063 SDValue Src1 = N1.getOperand(0);
14064 EVT SrcVT = Src0.getValueType();
14065 if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||
14066 SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||
14067 SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)
14068 return SDValue();
14069
14070 LLVMContext &C = *DAG.getContext();
14072 EVT NarrowVT = EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());
14073
14074 Src0 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src0), NarrowVT, Src0);
14075 Src1 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src1), NarrowVT, Src1);
14076
14077 // Src0 and Src1 are zero extended, so they're always positive if signed.
14078 //
14079 // sub can produce a negative from two positive operands, so it needs sign
14080 // extended. Other nodes produce a positive from two positive operands, so
14081 // zero extend instead.
14082 unsigned OuterExtend =
14083 N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
14084
14085 return DAG.getNode(
14086 OuterExtend, SDLoc(N), VT,
14087 DAG.getNode(N->getOpcode(), SDLoc(N), NarrowVT, Src0, Src1));
14088}
14089
14090// Try to turn (add (xor bool, 1) -1) into (neg bool).
14092 SDValue N0 = N->getOperand(0);
14093 SDValue N1 = N->getOperand(1);
14094 EVT VT = N->getValueType(0);
14095 SDLoc DL(N);
14096
14097 // RHS should be -1.
14098 if (!isAllOnesConstant(N1))
14099 return SDValue();
14100
14101 // Look for (xor X, 1).
14102 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
14103 return SDValue();
14104
14105 // First xor input should be 0 or 1.
14107 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
14108 return SDValue();
14109
14110 // Emit a negate of the setcc.
14111 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
14112 N0.getOperand(0));
14113}
14114
14117 const RISCVSubtarget &Subtarget) {
14118 SelectionDAG &DAG = DCI.DAG;
14119 if (SDValue V = combineAddOfBooleanXor(N, DAG))
14120 return V;
14121 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
14122 return V;
14123 if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer())
14124 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
14125 return V;
14126 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14127 return V;
14128 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14129 return V;
14130 if (SDValue V = combineBinOpOfZExt(N, DAG))
14131 return V;
14132
14133 // fold (add (select lhs, rhs, cc, 0, y), x) ->
14134 // (select lhs, rhs, cc, x, (add x, y))
14135 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
14136}
14137
14138// Try to turn a sub boolean RHS and constant LHS into an addi.
14140 SDValue N0 = N->getOperand(0);
14141 SDValue N1 = N->getOperand(1);
14142 EVT VT = N->getValueType(0);
14143 SDLoc DL(N);
14144
14145 // Require a constant LHS.
14146 auto *N0C = dyn_cast<ConstantSDNode>(N0);
14147 if (!N0C)
14148 return SDValue();
14149
14150 // All our optimizations involve subtracting 1 from the immediate and forming
14151 // an ADDI. Make sure the new immediate is valid for an ADDI.
14152 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
14153 if (!ImmValMinus1.isSignedIntN(12))
14154 return SDValue();
14155
14156 SDValue NewLHS;
14157 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
14158 // (sub constant, (setcc x, y, eq/neq)) ->
14159 // (add (setcc x, y, neq/eq), constant - 1)
14160 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
14161 EVT SetCCOpVT = N1.getOperand(0).getValueType();
14162 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
14163 return SDValue();
14164 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
14165 NewLHS =
14166 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
14167 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
14168 N1.getOperand(0).getOpcode() == ISD::SETCC) {
14169 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
14170 // Since setcc returns a bool the xor is equivalent to 1-setcc.
14171 NewLHS = N1.getOperand(0);
14172 } else
14173 return SDValue();
14174
14175 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
14176 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
14177}
14178
14179// Looks for (sub (shl X, 8-Y), (shr X, Y)) where the Y-th bit in each byte is
14180// potentially set. It is fine for Y to be 0, meaning that (sub (shl X, 8), X)
14181// is also valid. Replace with (orc.b X). For example, 0b0000_1000_0000_1000 is
14182// valid with Y=3, while 0b0000_1000_0000_0100 is not.
14184 const RISCVSubtarget &Subtarget) {
14185 if (!Subtarget.hasStdExtZbb())
14186 return SDValue();
14187
14188 EVT VT = N->getValueType(0);
14189
14190 if (VT != Subtarget.getXLenVT() && VT != MVT::i32 && VT != MVT::i16)
14191 return SDValue();
14192
14193 SDValue N0 = N->getOperand(0);
14194 SDValue N1 = N->getOperand(1);
14195
14196 if (N0->getOpcode() != ISD::SHL)
14197 return SDValue();
14198
14199 auto *ShAmtCLeft = dyn_cast<ConstantSDNode>(N0.getOperand(1));
14200 if (!ShAmtCLeft)
14201 return SDValue();
14202 unsigned ShiftedAmount = 8 - ShAmtCLeft->getZExtValue();
14203
14204 if (ShiftedAmount >= 8)
14205 return SDValue();
14206
14207 SDValue LeftShiftOperand = N0->getOperand(0);
14208 SDValue RightShiftOperand = N1;
14209
14210 if (ShiftedAmount != 0) { // Right operand must be a right shift.
14211 if (N1->getOpcode() != ISD::SRL)
14212 return SDValue();
14213 auto *ShAmtCRight = dyn_cast<ConstantSDNode>(N1.getOperand(1));
14214 if (!ShAmtCRight || ShAmtCRight->getZExtValue() != ShiftedAmount)
14215 return SDValue();
14216 RightShiftOperand = N1.getOperand(0);
14217 }
14218
14219 // At least one shift should have a single use.
14220 if (!N0.hasOneUse() && (ShiftedAmount == 0 || !N1.hasOneUse()))
14221 return SDValue();
14222
14223 if (LeftShiftOperand != RightShiftOperand)
14224 return SDValue();
14225
14226 APInt Mask = APInt::getSplat(VT.getSizeInBits(), APInt(8, 0x1));
14227 Mask <<= ShiftedAmount;
14228 // Check that X has indeed the right shape (only the Y-th bit can be set in
14229 // every byte).
14230 if (!DAG.MaskedValueIsZero(LeftShiftOperand, ~Mask))
14231 return SDValue();
14232
14233 return DAG.getNode(RISCVISD::ORC_B, SDLoc(N), VT, LeftShiftOperand);
14234}
14235
14237 const RISCVSubtarget &Subtarget) {
14238 if (SDValue V = combineSubOfBoolean(N, DAG))
14239 return V;
14240
14241 EVT VT = N->getValueType(0);
14242 SDValue N0 = N->getOperand(0);
14243 SDValue N1 = N->getOperand(1);
14244 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
14245 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
14246 isNullConstant(N1.getOperand(1))) {
14247 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
14248 if (CCVal == ISD::SETLT) {
14249 SDLoc DL(N);
14250 unsigned ShAmt = N0.getValueSizeInBits() - 1;
14251 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
14252 DAG.getConstant(ShAmt, DL, VT));
14253 }
14254 }
14255
14256 if (SDValue V = combineBinOpOfZExt(N, DAG))
14257 return V;
14258 if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))
14259 return V;
14260
14261 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
14262 // (select lhs, rhs, cc, x, (sub x, y))
14263 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
14264}
14265
14266// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
14267// Legalizing setcc can introduce xors like this. Doing this transform reduces
14268// the number of xors and may allow the xor to fold into a branch condition.
14270 SDValue N0 = N->getOperand(0);
14271 SDValue N1 = N->getOperand(1);
14272 bool IsAnd = N->getOpcode() == ISD::AND;
14273
14274 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
14275 return SDValue();
14276
14277 if (!N0.hasOneUse() || !N1.hasOneUse())
14278 return SDValue();
14279
14280 SDValue N01 = N0.getOperand(1);
14281 SDValue N11 = N1.getOperand(1);
14282
14283 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
14284 // (xor X, -1) based on the upper bits of the other operand being 0. If the
14285 // operation is And, allow one of the Xors to use -1.
14286 if (isOneConstant(N01)) {
14287 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
14288 return SDValue();
14289 } else if (isOneConstant(N11)) {
14290 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
14291 if (!(IsAnd && isAllOnesConstant(N01)))
14292 return SDValue();
14293 } else
14294 return SDValue();
14295
14296 EVT VT = N->getValueType(0);
14297
14298 SDValue N00 = N0.getOperand(0);
14299 SDValue N10 = N1.getOperand(0);
14300
14301 // The LHS of the xors needs to be 0/1.
14303 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
14304 return SDValue();
14305
14306 // Invert the opcode and insert a new xor.
14307 SDLoc DL(N);
14308 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
14309 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
14310 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
14311}
14312
14313// Fold (vXi8 (trunc (vselect (setltu, X, 256), X, (sext (setgt X, 0))))) to
14314// (vXi8 (trunc (smin (smax X, 0), 255))). This represents saturating a signed
14315// value to an unsigned value. This will be lowered to vmax and series of
14316// vnclipu instructions later. This can be extended to other truncated types
14317// other than i8 by replacing 256 and 255 with the equivalent constants for the
14318// type.
14320 EVT VT = N->getValueType(0);
14321 SDValue N0 = N->getOperand(0);
14322 EVT SrcVT = N0.getValueType();
14323
14324 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14325 if (!VT.isVector() || !TLI.isTypeLegal(VT) || !TLI.isTypeLegal(SrcVT))
14326 return SDValue();
14327
14328 if (N0.getOpcode() != ISD::VSELECT || !N0.hasOneUse())
14329 return SDValue();
14330
14331 SDValue Cond = N0.getOperand(0);
14332 SDValue True = N0.getOperand(1);
14333 SDValue False = N0.getOperand(2);
14334
14335 if (Cond.getOpcode() != ISD::SETCC)
14336 return SDValue();
14337
14338 // FIXME: Support the version of this pattern with the select operands
14339 // swapped.
14340 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
14341 if (CCVal != ISD::SETULT)
14342 return SDValue();
14343
14344 SDValue CondLHS = Cond.getOperand(0);
14345 SDValue CondRHS = Cond.getOperand(1);
14346
14347 if (CondLHS != True)
14348 return SDValue();
14349
14350 unsigned ScalarBits = VT.getScalarSizeInBits();
14351
14352 // FIXME: Support other constants.
14353 ConstantSDNode *CondRHSC = isConstOrConstSplat(CondRHS);
14354 if (!CondRHSC || CondRHSC->getAPIntValue() != (1ULL << ScalarBits))
14355 return SDValue();
14356
14357 if (False.getOpcode() != ISD::SIGN_EXTEND)
14358 return SDValue();
14359
14360 False = False.getOperand(0);
14361
14362 if (False.getOpcode() != ISD::SETCC || False.getOperand(0) != True)
14363 return SDValue();
14364
14365 ConstantSDNode *FalseRHSC = isConstOrConstSplat(False.getOperand(1));
14366 if (!FalseRHSC || !FalseRHSC->isZero())
14367 return SDValue();
14368
14369 ISD::CondCode CCVal2 = cast<CondCodeSDNode>(False.getOperand(2))->get();
14370 if (CCVal2 != ISD::SETGT)
14371 return SDValue();
14372
14373 // Emit the signed to unsigned saturation pattern.
14374 SDLoc DL(N);
14375 SDValue Max =
14376 DAG.getNode(ISD::SMAX, DL, SrcVT, True, DAG.getConstant(0, DL, SrcVT));
14377 SDValue Min =
14378 DAG.getNode(ISD::SMIN, DL, SrcVT, Max,
14379 DAG.getConstant((1ULL << ScalarBits) - 1, DL, SrcVT));
14380 return DAG.getNode(ISD::TRUNCATE, DL, VT, Min);
14381}
14382
14384 const RISCVSubtarget &Subtarget) {
14385 SDValue N0 = N->getOperand(0);
14386 EVT VT = N->getValueType(0);
14387
14388 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
14389 // extending X. This is safe since we only need the LSB after the shift and
14390 // shift amounts larger than 31 would produce poison. If we wait until
14391 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
14392 // to use a BEXT instruction.
14393 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
14394 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
14395 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
14396 SDLoc DL(N0);
14397 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
14398 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
14399 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
14400 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
14401 }
14402
14403 return combineTruncSelectToSMaxUSat(N, DAG);
14404}
14405
14406// Combines two comparison operation and logic operation to one selection
14407// operation(min, max) and logic operation. Returns new constructed Node if
14408// conditions for optimization are satisfied.
14411 const RISCVSubtarget &Subtarget) {
14412 SelectionDAG &DAG = DCI.DAG;
14413
14414 SDValue N0 = N->getOperand(0);
14415 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
14416 // extending X. This is safe since we only need the LSB after the shift and
14417 // shift amounts larger than 31 would produce poison. If we wait until
14418 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
14419 // to use a BEXT instruction.
14420 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
14421 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
14422 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
14423 N0.hasOneUse()) {
14424 SDLoc DL(N);
14425 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
14426 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
14427 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
14428 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
14429 DAG.getConstant(1, DL, MVT::i64));
14430 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
14431 }
14432
14433 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14434 return V;
14435 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14436 return V;
14437
14438 if (DCI.isAfterLegalizeDAG())
14439 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
14440 return V;
14441
14442 // fold (and (select lhs, rhs, cc, -1, y), x) ->
14443 // (select lhs, rhs, cc, x, (and x, y))
14444 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
14445}
14446
14447// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
14448// FIXME: Generalize to other binary operators with same operand.
14450 SelectionDAG &DAG) {
14451 assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
14452
14453 if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
14455 !N0.hasOneUse() || !N1.hasOneUse())
14456 return SDValue();
14457
14458 // Should have the same condition.
14459 SDValue Cond = N0.getOperand(1);
14460 if (Cond != N1.getOperand(1))
14461 return SDValue();
14462
14463 SDValue TrueV = N0.getOperand(0);
14464 SDValue FalseV = N1.getOperand(0);
14465
14466 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
14467 TrueV.getOperand(1) != FalseV.getOperand(1) ||
14468 !isOneConstant(TrueV.getOperand(1)) ||
14469 !TrueV.hasOneUse() || !FalseV.hasOneUse())
14470 return SDValue();
14471
14472 EVT VT = N->getValueType(0);
14473 SDLoc DL(N);
14474
14475 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
14476 Cond);
14477 SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0),
14478 Cond);
14479 SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1);
14480 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
14481}
14482
14484 const RISCVSubtarget &Subtarget) {
14485 SelectionDAG &DAG = DCI.DAG;
14486
14487 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14488 return V;
14489 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14490 return V;
14491
14492 if (DCI.isAfterLegalizeDAG())
14493 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
14494 return V;
14495
14496 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
14497 // We may be able to pull a common operation out of the true and false value.
14498 SDValue N0 = N->getOperand(0);
14499 SDValue N1 = N->getOperand(1);
14500 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
14501 return V;
14502 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
14503 return V;
14504
14505 // fold (or (select cond, 0, y), x) ->
14506 // (select cond, x, (or x, y))
14507 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
14508}
14509
14511 const RISCVSubtarget &Subtarget) {
14512 SDValue N0 = N->getOperand(0);
14513 SDValue N1 = N->getOperand(1);
14514
14515 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
14516 // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create
14517 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
14518 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
14519 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
14520 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
14521 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
14522 SDLoc DL(N);
14523 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
14524 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
14525 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
14526 SDValue And = DAG.getNOT(DL, Shl, MVT::i64);
14527 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
14528 }
14529
14530 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
14531 // NOTE: Assumes ROL being legal means ROLW is legal.
14532 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14533 if (N0.getOpcode() == RISCVISD::SLLW &&
14535 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
14536 SDLoc DL(N);
14537 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
14538 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
14539 }
14540
14541 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
14542 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
14543 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
14544 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
14545 if (ConstN00 && CC == ISD::SETLT) {
14546 EVT VT = N0.getValueType();
14547 SDLoc DL(N0);
14548 const APInt &Imm = ConstN00->getAPIntValue();
14549 if ((Imm + 1).isSignedIntN(12))
14550 return DAG.getSetCC(DL, VT, N0.getOperand(1),
14551 DAG.getConstant(Imm + 1, DL, VT), CC);
14552 }
14553 }
14554
14555 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14556 return V;
14557 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14558 return V;
14559
14560 // fold (xor (select cond, 0, y), x) ->
14561 // (select cond, x, (xor x, y))
14562 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
14563}
14564
14565// Try to expand a scalar multiply to a faster sequence.
14568 const RISCVSubtarget &Subtarget) {
14569
14570 EVT VT = N->getValueType(0);
14571
14572 // LI + MUL is usually smaller than the alternative sequence.
14574 return SDValue();
14575
14576 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14577 return SDValue();
14578
14579 if (VT != Subtarget.getXLenVT())
14580 return SDValue();
14581
14582 const bool HasShlAdd =
14583 Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa();
14584
14585 ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
14586 if (!CNode)
14587 return SDValue();
14588 uint64_t MulAmt = CNode->getZExtValue();
14589
14590 // WARNING: The code below is knowingly incorrect with regards to undef semantics.
14591 // We're adding additional uses of X here, and in principle, we should be freezing
14592 // X before doing so. However, adding freeze here causes real regressions, and no
14593 // other target properly freezes X in these cases either.
14594 SDValue X = N->getOperand(0);
14595
14596 if (HasShlAdd) {
14597 for (uint64_t Divisor : {3, 5, 9}) {
14598 if (MulAmt % Divisor != 0)
14599 continue;
14600 uint64_t MulAmt2 = MulAmt / Divisor;
14601 // 3/5/9 * 2^N -> shl (shXadd X, X), N
14602 if (isPowerOf2_64(MulAmt2)) {
14603 SDLoc DL(N);
14604 SDValue X = N->getOperand(0);
14605 // Put the shift first if we can fold a zext into the
14606 // shift forming a slli.uw.
14607 if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
14608 X.getConstantOperandVal(1) == UINT64_C(0xffffffff)) {
14609 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, X,
14610 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
14611 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl,
14612 DAG.getConstant(Log2_64(Divisor - 1), DL, VT),
14613 Shl);
14614 }
14615 // Otherwise, put rhe shl second so that it can fold with following
14616 // instructions (e.g. sext or add).
14617 SDValue Mul359 =
14618 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14619 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14620 return DAG.getNode(ISD::SHL, DL, VT, Mul359,
14621 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
14622 }
14623
14624 // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
14625 if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) {
14626 SDLoc DL(N);
14627 SDValue Mul359 =
14628 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14629 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14630 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
14631 DAG.getConstant(Log2_64(MulAmt2 - 1), DL, VT),
14632 Mul359);
14633 }
14634 }
14635
14636 // If this is a power 2 + 2/4/8, we can use a shift followed by a single
14637 // shXadd. First check if this a sum of two power of 2s because that's
14638 // easy. Then count how many zeros are up to the first bit.
14639 if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
14640 unsigned ScaleShift = llvm::countr_zero(MulAmt);
14641 if (ScaleShift >= 1 && ScaleShift < 4) {
14642 unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
14643 SDLoc DL(N);
14644 SDValue Shift1 =
14645 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
14646 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14647 DAG.getConstant(ScaleShift, DL, VT), Shift1);
14648 }
14649 }
14650
14651 // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
14652 // This is the two instruction form, there are also three instruction
14653 // variants we could implement. e.g.
14654 // (2^(1,2,3) * 3,5,9 + 1) << C2
14655 // 2^(C1>3) * 3,5,9 +/- 1
14656 for (uint64_t Divisor : {3, 5, 9}) {
14657 uint64_t C = MulAmt - 1;
14658 if (C <= Divisor)
14659 continue;
14660 unsigned TZ = llvm::countr_zero(C);
14661 if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) {
14662 SDLoc DL(N);
14663 SDValue Mul359 =
14664 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14665 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14666 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
14667 DAG.getConstant(TZ, DL, VT), X);
14668 }
14669 }
14670
14671 // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
14672 if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
14673 unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
14674 if (ScaleShift >= 1 && ScaleShift < 4) {
14675 unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2)));
14676 SDLoc DL(N);
14677 SDValue Shift1 =
14678 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
14679 return DAG.getNode(ISD::ADD, DL, VT, Shift1,
14680 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14681 DAG.getConstant(ScaleShift, DL, VT), X));
14682 }
14683 }
14684
14685 // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
14686 for (uint64_t Offset : {3, 5, 9}) {
14687 if (isPowerOf2_64(MulAmt + Offset)) {
14688 SDLoc DL(N);
14689 SDValue Shift1 =
14690 DAG.getNode(ISD::SHL, DL, VT, X,
14691 DAG.getConstant(Log2_64(MulAmt + Offset), DL, VT));
14692 SDValue Mul359 =
14693 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14694 DAG.getConstant(Log2_64(Offset - 1), DL, VT), X);
14695 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
14696 }
14697 }
14698 }
14699
14700 // 2^N - 2^M -> (sub (shl X, C1), (shl X, C2))
14701 uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
14702 if (isPowerOf2_64(MulAmt + MulAmtLowBit)) {
14703 uint64_t ShiftAmt1 = MulAmt + MulAmtLowBit;
14704 SDLoc DL(N);
14705 SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
14706 DAG.getConstant(Log2_64(ShiftAmt1), DL, VT));
14707 SDValue Shift2 =
14708 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
14709 DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT));
14710 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Shift2);
14711 }
14712
14713 if (HasShlAdd) {
14714 for (uint64_t Divisor : {3, 5, 9}) {
14715 if (MulAmt % Divisor != 0)
14716 continue;
14717 uint64_t MulAmt2 = MulAmt / Divisor;
14718 // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
14719 // of 25 which happen to be quite common.
14720 for (uint64_t Divisor2 : {3, 5, 9}) {
14721 if (MulAmt2 % Divisor2 != 0)
14722 continue;
14723 uint64_t MulAmt3 = MulAmt2 / Divisor2;
14724 if (isPowerOf2_64(MulAmt3)) {
14725 SDLoc DL(N);
14726 SDValue Mul359A =
14727 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14728 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14729 SDValue Mul359B = DAG.getNode(
14730 RISCVISD::SHL_ADD, DL, VT, Mul359A,
14731 DAG.getConstant(Log2_64(Divisor2 - 1), DL, VT), Mul359A);
14732 return DAG.getNode(ISD::SHL, DL, VT, Mul359B,
14733 DAG.getConstant(Log2_64(MulAmt3), DL, VT));
14734 }
14735 }
14736 }
14737 }
14738
14739 return SDValue();
14740}
14741
14742// Combine vXi32 (mul (and (lshr X, 15), 0x10001), 0xffff) ->
14743// (bitcast (sra (v2Xi16 (bitcast X)), 15))
14744// Same for other equivalent types with other equivalent constants.
14746 EVT VT = N->getValueType(0);
14747 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14748
14749 // Do this for legal vectors unless they are i1 or i8 vectors.
14750 if (!VT.isVector() || !TLI.isTypeLegal(VT) || VT.getScalarSizeInBits() < 16)
14751 return SDValue();
14752
14753 if (N->getOperand(0).getOpcode() != ISD::AND ||
14754 N->getOperand(0).getOperand(0).getOpcode() != ISD::SRL)
14755 return SDValue();
14756
14757 SDValue And = N->getOperand(0);
14758 SDValue Srl = And.getOperand(0);
14759
14760 APInt V1, V2, V3;
14761 if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) ||
14762 !ISD::isConstantSplatVector(And.getOperand(1).getNode(), V2) ||
14764 return SDValue();
14765
14766 unsigned HalfSize = VT.getScalarSizeInBits() / 2;
14767 if (!V1.isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
14768 V3 != (HalfSize - 1))
14769 return SDValue();
14770
14771 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(),
14772 EVT::getIntegerVT(*DAG.getContext(), HalfSize),
14773 VT.getVectorElementCount() * 2);
14774 SDLoc DL(N);
14775 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, HalfVT, Srl.getOperand(0));
14776 SDValue Sra = DAG.getNode(ISD::SRA, DL, HalfVT, Cast,
14777 DAG.getConstant(HalfSize - 1, DL, HalfVT));
14778 return DAG.getNode(ISD::BITCAST, DL, VT, Sra);
14779}
14780
14783 const RISCVSubtarget &Subtarget) {
14784 EVT VT = N->getValueType(0);
14785 if (!VT.isVector())
14786 return expandMul(N, DAG, DCI, Subtarget);
14787
14788 SDLoc DL(N);
14789 SDValue N0 = N->getOperand(0);
14790 SDValue N1 = N->getOperand(1);
14791 SDValue MulOper;
14792 unsigned AddSubOpc;
14793
14794 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
14795 // (mul x, add (y, 1)) -> (add x, (mul x, y))
14796 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
14797 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
14798 auto IsAddSubWith1 = [&](SDValue V) -> bool {
14799 AddSubOpc = V->getOpcode();
14800 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
14801 SDValue Opnd = V->getOperand(1);
14802 MulOper = V->getOperand(0);
14803 if (AddSubOpc == ISD::SUB)
14804 std::swap(Opnd, MulOper);
14805 if (isOneOrOneSplat(Opnd))
14806 return true;
14807 }
14808 return false;
14809 };
14810
14811 if (IsAddSubWith1(N0)) {
14812 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
14813 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
14814 }
14815
14816 if (IsAddSubWith1(N1)) {
14817 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
14818 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
14819 }
14820
14821 if (SDValue V = combineBinOpOfZExt(N, DAG))
14822 return V;
14823
14825 return V;
14826
14827 return SDValue();
14828}
14829
14830/// According to the property that indexed load/store instructions zero-extend
14831/// their indices, try to narrow the type of index operand.
14832static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
14833 if (isIndexTypeSigned(IndexType))
14834 return false;
14835
14836 if (!N->hasOneUse())
14837 return false;
14838
14839 EVT VT = N.getValueType();
14840 SDLoc DL(N);
14841
14842 // In general, what we're doing here is seeing if we can sink a truncate to
14843 // a smaller element type into the expression tree building our index.
14844 // TODO: We can generalize this and handle a bunch more cases if useful.
14845
14846 // Narrow a buildvector to the narrowest element type. This requires less
14847 // work and less register pressure at high LMUL, and creates smaller constants
14848 // which may be cheaper to materialize.
14849 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
14850 KnownBits Known = DAG.computeKnownBits(N);
14851 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
14852 LLVMContext &C = *DAG.getContext();
14853 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
14854 if (ResultVT.bitsLT(VT.getVectorElementType())) {
14855 N = DAG.getNode(ISD::TRUNCATE, DL,
14856 VT.changeVectorElementType(ResultVT), N);
14857 return true;
14858 }
14859 }
14860
14861 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
14862 if (N.getOpcode() != ISD::SHL)
14863 return false;
14864
14865 SDValue N0 = N.getOperand(0);
14866 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
14868 return false;
14869 if (!N0->hasOneUse())
14870 return false;
14871
14872 APInt ShAmt;
14873 SDValue N1 = N.getOperand(1);
14874 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
14875 return false;
14876
14877 SDValue Src = N0.getOperand(0);
14878 EVT SrcVT = Src.getValueType();
14879 unsigned SrcElen = SrcVT.getScalarSizeInBits();
14880 unsigned ShAmtV = ShAmt.getZExtValue();
14881 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
14882 NewElen = std::max(NewElen, 8U);
14883
14884 // Skip if NewElen is not narrower than the original extended type.
14885 if (NewElen >= N0.getValueType().getScalarSizeInBits())
14886 return false;
14887
14888 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
14889 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
14890
14891 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
14892 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
14893 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
14894 return true;
14895}
14896
14897// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
14898// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
14899// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
14900// can become a sext.w instead of a shift pair.
14902 const RISCVSubtarget &Subtarget) {
14903 SDValue N0 = N->getOperand(0);
14904 SDValue N1 = N->getOperand(1);
14905 EVT VT = N->getValueType(0);
14906 EVT OpVT = N0.getValueType();
14907
14908 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
14909 return SDValue();
14910
14911 // RHS needs to be a constant.
14912 auto *N1C = dyn_cast<ConstantSDNode>(N1);
14913 if (!N1C)
14914 return SDValue();
14915
14916 // LHS needs to be (and X, 0xffffffff).
14917 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
14918 !isa<ConstantSDNode>(N0.getOperand(1)) ||
14919 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
14920 return SDValue();
14921
14922 // Looking for an equality compare.
14923 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
14924 if (!isIntEqualitySetCC(Cond))
14925 return SDValue();
14926
14927 // Don't do this if the sign bit is provably zero, it will be turned back into
14928 // an AND.
14929 APInt SignMask = APInt::getOneBitSet(64, 31);
14930 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
14931 return SDValue();
14932
14933 const APInt &C1 = N1C->getAPIntValue();
14934
14935 SDLoc dl(N);
14936 // If the constant is larger than 2^32 - 1 it is impossible for both sides
14937 // to be equal.
14938 if (C1.getActiveBits() > 32)
14939 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
14940
14941 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
14942 N0.getOperand(0), DAG.getValueType(MVT::i32));
14943 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
14944 dl, OpVT), Cond);
14945}
14946
14947static SDValue
14949 const RISCVSubtarget &Subtarget) {
14950 SDValue Src = N->getOperand(0);
14951 EVT VT = N->getValueType(0);
14952 EVT SrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
14953 unsigned Opc = Src.getOpcode();
14954
14955 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
14956 // Don't do this with Zhinx. We need to explicitly sign extend the GPR.
14957 if (Opc == RISCVISD::FMV_X_ANYEXTH && SrcVT.bitsGE(MVT::i16) &&
14958 Subtarget.hasStdExtZfhmin())
14959 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT,
14960 Src.getOperand(0));
14961
14962 // Fold (sext_inreg (shl X, Y), i32) -> (sllw X, Y) iff Y u< 32
14963 if (Opc == ISD::SHL && Subtarget.is64Bit() && SrcVT == MVT::i32 &&
14964 VT == MVT::i64 && !isa<ConstantSDNode>(Src.getOperand(1)) &&
14965 DAG.computeKnownBits(Src.getOperand(1)).countMaxActiveBits() <= 5)
14966 return DAG.getNode(RISCVISD::SLLW, SDLoc(N), VT, Src.getOperand(0),
14967 Src.getOperand(1));
14968
14969 return SDValue();
14970}
14971
14972namespace {
14973// Forward declaration of the structure holding the necessary information to
14974// apply a combine.
14975struct CombineResult;
14976
14977enum ExtKind : uint8_t { ZExt = 1 << 0, SExt = 1 << 1, FPExt = 1 << 2 };
14978/// Helper class for folding sign/zero extensions.
14979/// In particular, this class is used for the following combines:
14980/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
14981/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
14982/// mul | mul_vl -> vwmul(u) | vwmul_su
14983/// shl | shl_vl -> vwsll
14984/// fadd -> vfwadd | vfwadd_w
14985/// fsub -> vfwsub | vfwsub_w
14986/// fmul -> vfwmul
14987/// An object of this class represents an operand of the operation we want to
14988/// combine.
14989/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
14990/// NodeExtensionHelper for `a` and one for `b`.
14991///
14992/// This class abstracts away how the extension is materialized and
14993/// how its number of users affect the combines.
14994///
14995/// In particular:
14996/// - VWADD_W is conceptually == add(op0, sext(op1))
14997/// - VWADDU_W == add(op0, zext(op1))
14998/// - VWSUB_W == sub(op0, sext(op1))
14999/// - VWSUBU_W == sub(op0, zext(op1))
15000/// - VFWADD_W == fadd(op0, fpext(op1))
15001/// - VFWSUB_W == fsub(op0, fpext(op1))
15002/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
15003/// zext|sext(smaller_value).
15004struct NodeExtensionHelper {
15005 /// Records if this operand is like being zero extended.
15006 bool SupportsZExt;
15007 /// Records if this operand is like being sign extended.
15008 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
15009 /// instance, a splat constant (e.g., 3), would support being both sign and
15010 /// zero extended.
15011 bool SupportsSExt;
15012 /// Records if this operand is like being floating-Point extended.
15013 bool SupportsFPExt;
15014 /// This boolean captures whether we care if this operand would still be
15015 /// around after the folding happens.
15016 bool EnforceOneUse;
15017 /// Original value that this NodeExtensionHelper represents.
15018 SDValue OrigOperand;
15019
15020 /// Get the value feeding the extension or the value itself.
15021 /// E.g., for zext(a), this would return a.
15022 SDValue getSource() const {
15023 switch (OrigOperand.getOpcode()) {
15024 case ISD::ZERO_EXTEND:
15025 case ISD::SIGN_EXTEND:
15026 case RISCVISD::VSEXT_VL:
15027 case RISCVISD::VZEXT_VL:
15029 return OrigOperand.getOperand(0);
15030 default:
15031 return OrigOperand;
15032 }
15033 }
15034
15035 /// Check if this instance represents a splat.
15036 bool isSplat() const {
15037 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
15038 OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
15039 }
15040
15041 /// Get the extended opcode.
15042 unsigned getExtOpc(ExtKind SupportsExt) const {
15043 switch (SupportsExt) {
15044 case ExtKind::SExt:
15045 return RISCVISD::VSEXT_VL;
15046 case ExtKind::ZExt:
15047 return RISCVISD::VZEXT_VL;
15048 case ExtKind::FPExt:
15050 }
15051 llvm_unreachable("Unknown ExtKind enum");
15052 }
15053
15054 /// Get or create a value that can feed \p Root with the given extension \p
15055 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
15056 /// operand. \see ::getSource().
15057 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
15058 const RISCVSubtarget &Subtarget,
15059 std::optional<ExtKind> SupportsExt) const {
15060 if (!SupportsExt.has_value())
15061 return OrigOperand;
15062
15063 MVT NarrowVT = getNarrowType(Root, *SupportsExt);
15064
15065 SDValue Source = getSource();
15066 assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
15067 if (Source.getValueType() == NarrowVT)
15068 return Source;
15069
15070 // vfmadd_vl -> vfwmadd_vl can take bf16 operands
15071 if (Source.getValueType().getVectorElementType() == MVT::bf16) {
15072 assert(Root->getSimpleValueType(0).getVectorElementType() == MVT::f32 &&
15073 Root->getOpcode() == RISCVISD::VFMADD_VL);
15074 return Source;
15075 }
15076
15077 unsigned ExtOpc = getExtOpc(*SupportsExt);
15078
15079 // If we need an extension, we should be changing the type.
15080 SDLoc DL(OrigOperand);
15081 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
15082 switch (OrigOperand.getOpcode()) {
15083 case ISD::ZERO_EXTEND:
15084 case ISD::SIGN_EXTEND:
15085 case RISCVISD::VSEXT_VL:
15086 case RISCVISD::VZEXT_VL:
15088 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
15089 case ISD::SPLAT_VECTOR:
15090 return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
15092 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
15093 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
15095 Source = Source.getOperand(1);
15096 assert(Source.getOpcode() == ISD::FP_EXTEND && "Unexpected source");
15097 Source = Source.getOperand(0);
15098 assert(Source.getValueType() == NarrowVT.getVectorElementType());
15099 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, NarrowVT,
15100 DAG.getUNDEF(NarrowVT), Source, VL);
15101 default:
15102 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
15103 // and that operand should already have the right NarrowVT so no
15104 // extension should be required at this point.
15105 llvm_unreachable("Unsupported opcode");
15106 }
15107 }
15108
15109 /// Helper function to get the narrow type for \p Root.
15110 /// The narrow type is the type of \p Root where we divided the size of each
15111 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
15112 /// \pre Both the narrow type and the original type should be legal.
15113 static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {
15114 MVT VT = Root->getSimpleValueType(0);
15115
15116 // Determine the narrow size.
15117 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
15118
15119 MVT EltVT = SupportsExt == ExtKind::FPExt
15120 ? MVT::getFloatingPointVT(NarrowSize)
15121 : MVT::getIntegerVT(NarrowSize);
15122
15123 assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&
15124 "Trying to extend something we can't represent");
15125 MVT NarrowVT = MVT::getVectorVT(EltVT, VT.getVectorElementCount());
15126 return NarrowVT;
15127 }
15128
15129 /// Get the opcode to materialize:
15130 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
15131 static unsigned getSExtOpcode(unsigned Opcode) {
15132 switch (Opcode) {
15133 case ISD::ADD:
15134 case RISCVISD::ADD_VL:
15137 case ISD::OR:
15138 return RISCVISD::VWADD_VL;
15139 case ISD::SUB:
15140 case RISCVISD::SUB_VL:
15143 return RISCVISD::VWSUB_VL;
15144 case ISD::MUL:
15145 case RISCVISD::MUL_VL:
15146 return RISCVISD::VWMUL_VL;
15147 default:
15148 llvm_unreachable("Unexpected opcode");
15149 }
15150 }
15151
15152 /// Get the opcode to materialize:
15153 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
15154 static unsigned getZExtOpcode(unsigned Opcode) {
15155 switch (Opcode) {
15156 case ISD::ADD:
15157 case RISCVISD::ADD_VL:
15160 case ISD::OR:
15161 return RISCVISD::VWADDU_VL;
15162 case ISD::SUB:
15163 case RISCVISD::SUB_VL:
15166 return RISCVISD::VWSUBU_VL;
15167 case ISD::MUL:
15168 case RISCVISD::MUL_VL:
15169 return RISCVISD::VWMULU_VL;
15170 case ISD::SHL:
15171 case RISCVISD::SHL_VL:
15172 return RISCVISD::VWSLL_VL;
15173 default:
15174 llvm_unreachable("Unexpected opcode");
15175 }
15176 }
15177
15178 /// Get the opcode to materialize:
15179 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
15180 static unsigned getFPExtOpcode(unsigned Opcode) {
15181 switch (Opcode) {
15182 case RISCVISD::FADD_VL:
15184 return RISCVISD::VFWADD_VL;
15185 case RISCVISD::FSUB_VL:
15187 return RISCVISD::VFWSUB_VL;
15188 case RISCVISD::FMUL_VL:
15189 return RISCVISD::VFWMUL_VL;
15191 return RISCVISD::VFWMADD_VL;
15193 return RISCVISD::VFWMSUB_VL;
15195 return RISCVISD::VFWNMADD_VL;
15197 return RISCVISD::VFWNMSUB_VL;
15198 default:
15199 llvm_unreachable("Unexpected opcode");
15200 }
15201 }
15202
15203 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
15204 /// newOpcode(a, b).
15205 static unsigned getSUOpcode(unsigned Opcode) {
15206 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
15207 "SU is only supported for MUL");
15208 return RISCVISD::VWMULSU_VL;
15209 }
15210
15211 /// Get the opcode to materialize
15212 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).
15213 static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
15214 switch (Opcode) {
15215 case ISD::ADD:
15216 case RISCVISD::ADD_VL:
15217 case ISD::OR:
15218 return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL
15220 case ISD::SUB:
15221 case RISCVISD::SUB_VL:
15222 return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL
15224 case RISCVISD::FADD_VL:
15225 return RISCVISD::VFWADD_W_VL;
15226 case RISCVISD::FSUB_VL:
15227 return RISCVISD::VFWSUB_W_VL;
15228 default:
15229 llvm_unreachable("Unexpected opcode");
15230 }
15231 }
15232
15233 using CombineToTry = std::function<std::optional<CombineResult>(
15234 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
15235 const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
15236 const RISCVSubtarget &)>;
15237
15238 /// Check if this node needs to be fully folded or extended for all users.
15239 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
15240
15241 void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
15242 const RISCVSubtarget &Subtarget) {
15243 unsigned Opc = OrigOperand.getOpcode();
15244 MVT VT = OrigOperand.getSimpleValueType();
15245
15246 assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
15247 "Unexpected Opcode");
15248
15249 // The pasthru must be undef for tail agnostic.
15250 if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
15251 return;
15252
15253 // Get the scalar value.
15254 SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)
15255 : OrigOperand.getOperand(1);
15256
15257 // See if we have enough sign bits or zero bits in the scalar to use a
15258 // widening opcode by splatting to smaller element size.
15259 unsigned EltBits = VT.getScalarSizeInBits();
15260 unsigned ScalarBits = Op.getValueSizeInBits();
15261 // If we're not getting all bits from the element, we need special handling.
15262 if (ScalarBits < EltBits) {
15263 // This should only occur on RV32.
15264 assert(Opc == RISCVISD::VMV_V_X_VL && EltBits == 64 && ScalarBits == 32 &&
15265 !Subtarget.is64Bit() && "Unexpected splat");
15266 // vmv.v.x sign extends narrow inputs.
15267 SupportsSExt = true;
15268
15269 // If the input is positive, then sign extend is also zero extend.
15270 if (DAG.SignBitIsZero(Op))
15271 SupportsZExt = true;
15272
15273 EnforceOneUse = false;
15274 return;
15275 }
15276
15277 unsigned NarrowSize = EltBits / 2;
15278 // If the narrow type cannot be expressed with a legal VMV,
15279 // this is not a valid candidate.
15280 if (NarrowSize < 8)
15281 return;
15282
15283 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
15284 SupportsSExt = true;
15285
15286 if (DAG.MaskedValueIsZero(Op,
15287 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
15288 SupportsZExt = true;
15289
15290 EnforceOneUse = false;
15291 }
15292
15293 bool isSupportedFPExtend(SDNode *Root, MVT NarrowEltVT,
15294 const RISCVSubtarget &Subtarget) {
15295 // Any f16 extension will neeed zvfh
15296 if (NarrowEltVT == MVT::f16 && !Subtarget.hasVInstructionsF16())
15297 return false;
15298 // The only bf16 extension we can do is vfmadd_vl -> vfwmadd_vl with
15299 // zvfbfwma
15300 if (NarrowEltVT == MVT::bf16 && (!Subtarget.hasStdExtZvfbfwma() ||
15301 Root->getOpcode() != RISCVISD::VFMADD_VL))
15302 return false;
15303 return true;
15304 }
15305
15306 /// Helper method to set the various fields of this struct based on the
15307 /// type of \p Root.
15308 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
15309 const RISCVSubtarget &Subtarget) {
15310 SupportsZExt = false;
15311 SupportsSExt = false;
15312 SupportsFPExt = false;
15313 EnforceOneUse = true;
15314 unsigned Opc = OrigOperand.getOpcode();
15315 // For the nodes we handle below, we end up using their inputs directly: see
15316 // getSource(). However since they either don't have a passthru or we check
15317 // that their passthru is undef, we can safely ignore their mask and VL.
15318 switch (Opc) {
15319 case ISD::ZERO_EXTEND:
15320 case ISD::SIGN_EXTEND: {
15321 MVT VT = OrigOperand.getSimpleValueType();
15322 if (!VT.isVector())
15323 break;
15324
15325 SDValue NarrowElt = OrigOperand.getOperand(0);
15326 MVT NarrowVT = NarrowElt.getSimpleValueType();
15327 // i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
15328 if (NarrowVT.getVectorElementType() == MVT::i1)
15329 break;
15330
15331 SupportsZExt = Opc == ISD::ZERO_EXTEND;
15332 SupportsSExt = Opc == ISD::SIGN_EXTEND;
15333 break;
15334 }
15335 case RISCVISD::VZEXT_VL:
15336 SupportsZExt = true;
15337 break;
15338 case RISCVISD::VSEXT_VL:
15339 SupportsSExt = true;
15340 break;
15342 MVT NarrowEltVT =
15344 if (!isSupportedFPExtend(Root, NarrowEltVT, Subtarget))
15345 break;
15346 SupportsFPExt = true;
15347 break;
15348 }
15349 case ISD::SPLAT_VECTOR:
15351 fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
15352 break;
15353 case RISCVISD::VFMV_V_F_VL: {
15354 MVT VT = OrigOperand.getSimpleValueType();
15355
15356 if (!OrigOperand.getOperand(0).isUndef())
15357 break;
15358
15359 SDValue Op = OrigOperand.getOperand(1);
15360 if (Op.getOpcode() != ISD::FP_EXTEND)
15361 break;
15362
15363 if (!isSupportedFPExtend(Root, Op.getOperand(0).getSimpleValueType(),
15364 Subtarget))
15365 break;
15366
15367 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
15368 unsigned ScalarBits = Op.getOperand(0).getValueSizeInBits();
15369 if (NarrowSize != ScalarBits)
15370 break;
15371
15372 SupportsFPExt = true;
15373 break;
15374 }
15375 default:
15376 break;
15377 }
15378 }
15379
15380 /// Check if \p Root supports any extension folding combines.
15381 static bool isSupportedRoot(const SDNode *Root,
15382 const RISCVSubtarget &Subtarget) {
15383 switch (Root->getOpcode()) {
15384 case ISD::ADD:
15385 case ISD::SUB:
15386 case ISD::MUL: {
15387 return Root->getValueType(0).isScalableVector();
15388 }
15389 case ISD::OR: {
15390 return Root->getValueType(0).isScalableVector() &&
15391 Root->getFlags().hasDisjoint();
15392 }
15393 // Vector Widening Integer Add/Sub/Mul Instructions
15394 case RISCVISD::ADD_VL:
15395 case RISCVISD::MUL_VL:
15398 case RISCVISD::SUB_VL:
15401 // Vector Widening Floating-Point Add/Sub/Mul Instructions
15402 case RISCVISD::FADD_VL:
15403 case RISCVISD::FSUB_VL:
15404 case RISCVISD::FMUL_VL:
15407 return true;
15408 case ISD::SHL:
15409 return Root->getValueType(0).isScalableVector() &&
15410 Subtarget.hasStdExtZvbb();
15411 case RISCVISD::SHL_VL:
15412 return Subtarget.hasStdExtZvbb();
15417 return true;
15418 default:
15419 return false;
15420 }
15421 }
15422
15423 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
15424 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
15425 const RISCVSubtarget &Subtarget) {
15426 assert(isSupportedRoot(Root, Subtarget) &&
15427 "Trying to build an helper with an "
15428 "unsupported root");
15429 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
15431 OrigOperand = Root->getOperand(OperandIdx);
15432
15433 unsigned Opc = Root->getOpcode();
15434 switch (Opc) {
15435 // We consider
15436 // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))
15437 // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))
15438 // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))
15445 if (OperandIdx == 1) {
15446 SupportsZExt =
15448 SupportsSExt =
15450 SupportsFPExt =
15452 // There's no existing extension here, so we don't have to worry about
15453 // making sure it gets removed.
15454 EnforceOneUse = false;
15455 break;
15456 }
15457 [[fallthrough]];
15458 default:
15459 fillUpExtensionSupport(Root, DAG, Subtarget);
15460 break;
15461 }
15462 }
15463
15464 /// Helper function to get the Mask and VL from \p Root.
15465 static std::pair<SDValue, SDValue>
15466 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
15467 const RISCVSubtarget &Subtarget) {
15468 assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
15469 switch (Root->getOpcode()) {
15470 case ISD::ADD:
15471 case ISD::SUB:
15472 case ISD::MUL:
15473 case ISD::OR:
15474 case ISD::SHL: {
15475 SDLoc DL(Root);
15476 MVT VT = Root->getSimpleValueType(0);
15477 return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
15478 }
15479 default:
15480 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
15481 }
15482 }
15483
15484 /// Helper function to check if \p N is commutative with respect to the
15485 /// foldings that are supported by this class.
15486 static bool isCommutative(const SDNode *N) {
15487 switch (N->getOpcode()) {
15488 case ISD::ADD:
15489 case ISD::MUL:
15490 case ISD::OR:
15491 case RISCVISD::ADD_VL:
15492 case RISCVISD::MUL_VL:
15495 case RISCVISD::FADD_VL:
15496 case RISCVISD::FMUL_VL:
15502 return true;
15503 case ISD::SUB:
15504 case RISCVISD::SUB_VL:
15507 case RISCVISD::FSUB_VL:
15509 case ISD::SHL:
15510 case RISCVISD::SHL_VL:
15511 return false;
15512 default:
15513 llvm_unreachable("Unexpected opcode");
15514 }
15515 }
15516
15517 /// Get a list of combine to try for folding extensions in \p Root.
15518 /// Note that each returned CombineToTry function doesn't actually modify
15519 /// anything. Instead they produce an optional CombineResult that if not None,
15520 /// need to be materialized for the combine to be applied.
15521 /// \see CombineResult::materialize.
15522 /// If the related CombineToTry function returns std::nullopt, that means the
15523 /// combine didn't match.
15524 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
15525};
15526
15527/// Helper structure that holds all the necessary information to materialize a
15528/// combine that does some extension folding.
15529struct CombineResult {
15530 /// Opcode to be generated when materializing the combine.
15531 unsigned TargetOpcode;
15532 // No value means no extension is needed.
15533 std::optional<ExtKind> LHSExt;
15534 std::optional<ExtKind> RHSExt;
15535 /// Root of the combine.
15536 SDNode *Root;
15537 /// LHS of the TargetOpcode.
15538 NodeExtensionHelper LHS;
15539 /// RHS of the TargetOpcode.
15540 NodeExtensionHelper RHS;
15541
15542 CombineResult(unsigned TargetOpcode, SDNode *Root,
15543 const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
15544 const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
15545 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),
15546 LHS(LHS), RHS(RHS) {}
15547
15548 /// Return a value that uses TargetOpcode and that can be used to replace
15549 /// Root.
15550 /// The actual replacement is *not* done in that method.
15551 SDValue materialize(SelectionDAG &DAG,
15552 const RISCVSubtarget &Subtarget) const {
15553 SDValue Mask, VL, Passthru;
15554 std::tie(Mask, VL) =
15555 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
15556 switch (Root->getOpcode()) {
15557 default:
15558 Passthru = Root->getOperand(2);
15559 break;
15560 case ISD::ADD:
15561 case ISD::SUB:
15562 case ISD::MUL:
15563 case ISD::OR:
15564 case ISD::SHL:
15565 Passthru = DAG.getUNDEF(Root->getValueType(0));
15566 break;
15567 }
15568 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
15569 LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),
15570 RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),
15571 Passthru, Mask, VL);
15572 }
15573};
15574
15575/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
15576/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
15577/// are zext) and LHS and RHS can be folded into Root.
15578/// AllowExtMask define which form `ext` can take in this pattern.
15579///
15580/// \note If the pattern can match with both zext and sext, the returned
15581/// CombineResult will feature the zext result.
15582///
15583/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15584/// can be used to apply the pattern.
15585static std::optional<CombineResult>
15586canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
15587 const NodeExtensionHelper &RHS,
15588 uint8_t AllowExtMask, SelectionDAG &DAG,
15589 const RISCVSubtarget &Subtarget) {
15590 if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)
15591 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
15592 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
15593 /*RHSExt=*/{ExtKind::ZExt});
15594 if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)
15595 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
15596 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
15597 /*RHSExt=*/{ExtKind::SExt});
15598 if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)
15599 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
15600 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
15601 /*RHSExt=*/{ExtKind::FPExt});
15602 return std::nullopt;
15603}
15604
15605/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
15606/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
15607/// are zext) and LHS and RHS can be folded into Root.
15608///
15609/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15610/// can be used to apply the pattern.
15611static std::optional<CombineResult>
15612canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
15613 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15614 const RISCVSubtarget &Subtarget) {
15615 return canFoldToVWWithSameExtensionImpl(
15616 Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,
15617 Subtarget);
15618}
15619
15620/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
15621///
15622/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15623/// can be used to apply the pattern.
15624static std::optional<CombineResult>
15625canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
15626 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15627 const RISCVSubtarget &Subtarget) {
15628 if (RHS.SupportsFPExt)
15629 return CombineResult(
15630 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),
15631 Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt});
15632
15633 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
15634 // sext/zext?
15635 // Control this behavior behind an option (AllowSplatInVW_W) for testing
15636 // purposes.
15637 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
15638 return CombineResult(
15639 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,
15640 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt});
15641 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
15642 return CombineResult(
15643 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,
15644 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt});
15645 return std::nullopt;
15646}
15647
15648/// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
15649///
15650/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15651/// can be used to apply the pattern.
15652static std::optional<CombineResult>
15653canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
15654 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15655 const RISCVSubtarget &Subtarget) {
15656 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::SExt, DAG,
15657 Subtarget);
15658}
15659
15660/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
15661///
15662/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15663/// can be used to apply the pattern.
15664static std::optional<CombineResult>
15665canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
15666 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15667 const RISCVSubtarget &Subtarget) {
15668 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,
15669 Subtarget);
15670}
15671
15672/// Check if \p Root follows a pattern Root(fpext(LHS), fpext(RHS))
15673///
15674/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15675/// can be used to apply the pattern.
15676static std::optional<CombineResult>
15677canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS,
15678 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15679 const RISCVSubtarget &Subtarget) {
15680 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::FPExt, DAG,
15681 Subtarget);
15682}
15683
15684/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
15685///
15686/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15687/// can be used to apply the pattern.
15688static std::optional<CombineResult>
15689canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
15690 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15691 const RISCVSubtarget &Subtarget) {
15692
15693 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
15694 return std::nullopt;
15695 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
15696 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
15697 /*RHSExt=*/{ExtKind::ZExt});
15698}
15699
15701NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
15702 SmallVector<CombineToTry> Strategies;
15703 switch (Root->getOpcode()) {
15704 case ISD::ADD:
15705 case ISD::SUB:
15706 case ISD::OR:
15707 case RISCVISD::ADD_VL:
15708 case RISCVISD::SUB_VL:
15709 case RISCVISD::FADD_VL:
15710 case RISCVISD::FSUB_VL:
15711 // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub
15712 Strategies.push_back(canFoldToVWWithSameExtension);
15713 // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w
15714 Strategies.push_back(canFoldToVW_W);
15715 break;
15716 case RISCVISD::FMUL_VL:
15721 Strategies.push_back(canFoldToVWWithSameExtension);
15722 break;
15723 case ISD::MUL:
15724 case RISCVISD::MUL_VL:
15725 // mul -> vwmul(u)
15726 Strategies.push_back(canFoldToVWWithSameExtension);
15727 // mul -> vwmulsu
15728 Strategies.push_back(canFoldToVW_SU);
15729 break;
15730 case ISD::SHL:
15731 case RISCVISD::SHL_VL:
15732 // shl -> vwsll
15733 Strategies.push_back(canFoldToVWWithZEXT);
15734 break;
15737 // vwadd_w|vwsub_w -> vwadd|vwsub
15738 Strategies.push_back(canFoldToVWWithSEXT);
15739 break;
15742 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
15743 Strategies.push_back(canFoldToVWWithZEXT);
15744 break;
15747 // vfwadd_w|vfwsub_w -> vfwadd|vfwsub
15748 Strategies.push_back(canFoldToVWWithFPEXT);
15749 break;
15750 default:
15751 llvm_unreachable("Unexpected opcode");
15752 }
15753 return Strategies;
15754}
15755} // End anonymous namespace.
15756
15757/// Combine a binary or FMA operation to its equivalent VW or VW_W form.
15758/// The supported combines are:
15759/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
15760/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
15761/// mul | mul_vl -> vwmul(u) | vwmul_su
15762/// shl | shl_vl -> vwsll
15763/// fadd_vl -> vfwadd | vfwadd_w
15764/// fsub_vl -> vfwsub | vfwsub_w
15765/// fmul_vl -> vfwmul
15766/// vwadd_w(u) -> vwadd(u)
15767/// vwsub_w(u) -> vwsub(u)
15768/// vfwadd_w -> vfwadd
15769/// vfwsub_w -> vfwsub
15772 const RISCVSubtarget &Subtarget) {
15773 SelectionDAG &DAG = DCI.DAG;
15774 if (DCI.isBeforeLegalize())
15775 return SDValue();
15776
15777 if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))
15778 return SDValue();
15779
15780 SmallVector<SDNode *> Worklist;
15781 SmallSet<SDNode *, 8> Inserted;
15782 Worklist.push_back(N);
15783 Inserted.insert(N);
15784 SmallVector<CombineResult> CombinesToApply;
15785
15786 while (!Worklist.empty()) {
15787 SDNode *Root = Worklist.pop_back_val();
15788
15789 NodeExtensionHelper LHS(Root, 0, DAG, Subtarget);
15790 NodeExtensionHelper RHS(Root, 1, DAG, Subtarget);
15791 auto AppendUsersIfNeeded = [&Worklist, &Subtarget,
15792 &Inserted](const NodeExtensionHelper &Op) {
15793 if (Op.needToPromoteOtherUsers()) {
15794 for (SDUse &Use : Op.OrigOperand->uses()) {
15795 SDNode *TheUser = Use.getUser();
15796 if (!NodeExtensionHelper::isSupportedRoot(TheUser, Subtarget))
15797 return false;
15798 // We only support the first 2 operands of FMA.
15799 if (Use.getOperandNo() >= 2)
15800 return false;
15801 if (Inserted.insert(TheUser).second)
15802 Worklist.push_back(TheUser);
15803 }
15804 }
15805 return true;
15806 };
15807
15808 // Control the compile time by limiting the number of node we look at in
15809 // total.
15810 if (Inserted.size() > ExtensionMaxWebSize)
15811 return SDValue();
15812
15814 NodeExtensionHelper::getSupportedFoldings(Root);
15815
15816 assert(!FoldingStrategies.empty() && "Nothing to be folded");
15817 bool Matched = false;
15818 for (int Attempt = 0;
15819 (Attempt != 1 + NodeExtensionHelper::isCommutative(Root)) && !Matched;
15820 ++Attempt) {
15821
15822 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
15823 FoldingStrategies) {
15824 std::optional<CombineResult> Res =
15825 FoldingStrategy(Root, LHS, RHS, DAG, Subtarget);
15826 if (Res) {
15827 Matched = true;
15828 CombinesToApply.push_back(*Res);
15829 // All the inputs that are extended need to be folded, otherwise
15830 // we would be leaving the old input (since it is may still be used),
15831 // and the new one.
15832 if (Res->LHSExt.has_value())
15833 if (!AppendUsersIfNeeded(LHS))
15834 return SDValue();
15835 if (Res->RHSExt.has_value())
15836 if (!AppendUsersIfNeeded(RHS))
15837 return SDValue();
15838 break;
15839 }
15840 }
15841 std::swap(LHS, RHS);
15842 }
15843 // Right now we do an all or nothing approach.
15844 if (!Matched)
15845 return SDValue();
15846 }
15847 // Store the value for the replacement of the input node separately.
15848 SDValue InputRootReplacement;
15849 // We do the RAUW after we materialize all the combines, because some replaced
15850 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
15851 // some of these nodes may appear in the NodeExtensionHelpers of some of the
15852 // yet-to-be-visited CombinesToApply roots.
15854 ValuesToReplace.reserve(CombinesToApply.size());
15855 for (CombineResult Res : CombinesToApply) {
15856 SDValue NewValue = Res.materialize(DAG, Subtarget);
15857 if (!InputRootReplacement) {
15858 assert(Res.Root == N &&
15859 "First element is expected to be the current node");
15860 InputRootReplacement = NewValue;
15861 } else {
15862 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
15863 }
15864 }
15865 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
15866 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
15867 DCI.AddToWorklist(OldNewValues.second.getNode());
15868 }
15869 return InputRootReplacement;
15870}
15871
15872// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
15873// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
15874// y will be the Passthru and cond will be the Mask.
15876 unsigned Opc = N->getOpcode();
15879
15880 SDValue Y = N->getOperand(0);
15881 SDValue MergeOp = N->getOperand(1);
15882 unsigned MergeOpc = MergeOp.getOpcode();
15883
15884 if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
15885 return SDValue();
15886
15887 SDValue X = MergeOp->getOperand(1);
15888
15889 if (!MergeOp.hasOneUse())
15890 return SDValue();
15891
15892 // Passthru should be undef
15893 SDValue Passthru = N->getOperand(2);
15894 if (!Passthru.isUndef())
15895 return SDValue();
15896
15897 // Mask should be all ones
15898 SDValue Mask = N->getOperand(3);
15899 if (Mask.getOpcode() != RISCVISD::VMSET_VL)
15900 return SDValue();
15901
15902 // False value of MergeOp should be all zeros
15903 SDValue Z = MergeOp->getOperand(2);
15904
15905 if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
15906 (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))
15907 Z = Z.getOperand(1);
15908
15909 if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))
15910 return SDValue();
15911
15912 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),
15913 {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},
15914 N->getFlags());
15915}
15916
15919 const RISCVSubtarget &Subtarget) {
15920 [[maybe_unused]] unsigned Opc = N->getOpcode();
15923
15924 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
15925 return V;
15926
15927 return combineVWADDSUBWSelect(N, DCI.DAG);
15928}
15929
15930// Helper function for performMemPairCombine.
15931// Try to combine the memory loads/stores LSNode1 and LSNode2
15932// into a single memory pair operation.
15934 LSBaseSDNode *LSNode2, SDValue BasePtr,
15935 uint64_t Imm) {
15937 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
15938
15939 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
15940 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
15941 return SDValue();
15942
15944 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
15945
15946 // The new operation has twice the width.
15947 MVT XLenVT = Subtarget.getXLenVT();
15948 EVT MemVT = LSNode1->getMemoryVT();
15949 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
15950 MachineMemOperand *MMO = LSNode1->getMemOperand();
15952 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
15953
15954 if (LSNode1->getOpcode() == ISD::LOAD) {
15955 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
15956 unsigned Opcode;
15957 if (MemVT == MVT::i32)
15958 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
15959 else
15960 Opcode = RISCVISD::TH_LDD;
15961
15962 SDValue Res = DAG.getMemIntrinsicNode(
15963 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
15964 {LSNode1->getChain(), BasePtr,
15965 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
15966 NewMemVT, NewMMO);
15967
15968 SDValue Node1 =
15969 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
15970 SDValue Node2 =
15971 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
15972
15973 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
15974 return Node1;
15975 } else {
15976 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
15977
15978 SDValue Res = DAG.getMemIntrinsicNode(
15979 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
15980 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
15981 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
15982 NewMemVT, NewMMO);
15983
15984 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
15985 return Res;
15986 }
15987}
15988
15989// Try to combine two adjacent loads/stores to a single pair instruction from
15990// the XTHeadMemPair vendor extension.
15993 SelectionDAG &DAG = DCI.DAG;
15995 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
15996
15997 // Target does not support load/store pair.
15998 if (!Subtarget.hasVendorXTHeadMemPair())
15999 return SDValue();
16000
16001 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
16002 EVT MemVT = LSNode1->getMemoryVT();
16003 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
16004
16005 // No volatile, indexed or atomic loads/stores.
16006 if (!LSNode1->isSimple() || LSNode1->isIndexed())
16007 return SDValue();
16008
16009 // Function to get a base + constant representation from a memory value.
16010 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
16011 if (Ptr->getOpcode() == ISD::ADD)
16012 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
16013 return {Ptr->getOperand(0), C1->getZExtValue()};
16014 return {Ptr, 0};
16015 };
16016
16017 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
16018
16019 SDValue Chain = N->getOperand(0);
16020 for (SDUse &Use : Chain->uses()) {
16021 if (Use.getUser() != N && Use.getResNo() == 0 &&
16022 Use.getUser()->getOpcode() == N->getOpcode()) {
16023 LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser());
16024
16025 // No volatile, indexed or atomic loads/stores.
16026 if (!LSNode2->isSimple() || LSNode2->isIndexed())
16027 continue;
16028
16029 // Check if LSNode1 and LSNode2 have the same type and extension.
16030 if (LSNode1->getOpcode() == ISD::LOAD)
16031 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
16032 cast<LoadSDNode>(LSNode1)->getExtensionType())
16033 continue;
16034
16035 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
16036 continue;
16037
16038 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
16039
16040 // Check if the base pointer is the same for both instruction.
16041 if (Base1 != Base2)
16042 continue;
16043
16044 // Check if the offsets match the XTHeadMemPair encoding contraints.
16045 bool Valid = false;
16046 if (MemVT == MVT::i32) {
16047 // Check for adjacent i32 values and a 2-bit index.
16048 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
16049 Valid = true;
16050 } else if (MemVT == MVT::i64) {
16051 // Check for adjacent i64 values and a 2-bit index.
16052 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
16053 Valid = true;
16054 }
16055
16056 if (!Valid)
16057 continue;
16058
16059 // Try to combine.
16060 if (SDValue Res =
16061 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
16062 return Res;
16063 }
16064 }
16065
16066 return SDValue();
16067}
16068
16069// Fold
16070// (fp_to_int (froundeven X)) -> fcvt X, rne
16071// (fp_to_int (ftrunc X)) -> fcvt X, rtz
16072// (fp_to_int (ffloor X)) -> fcvt X, rdn
16073// (fp_to_int (fceil X)) -> fcvt X, rup
16074// (fp_to_int (fround X)) -> fcvt X, rmm
16075// (fp_to_int (frint X)) -> fcvt X
16078 const RISCVSubtarget &Subtarget) {
16079 SelectionDAG &DAG = DCI.DAG;
16080 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16081 MVT XLenVT = Subtarget.getXLenVT();
16082
16083 SDValue Src = N->getOperand(0);
16084
16085 // Don't do this for strict-fp Src.
16086 if (Src->isStrictFPOpcode())
16087 return SDValue();
16088
16089 // Ensure the FP type is legal.
16090 if (!TLI.isTypeLegal(Src.getValueType()))
16091 return SDValue();
16092
16093 // Don't do this for f16 with Zfhmin and not Zfh.
16094 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
16095 return SDValue();
16096
16097 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
16098 // If the result is invalid, we didn't find a foldable instruction.
16099 if (FRM == RISCVFPRndMode::Invalid)
16100 return SDValue();
16101
16102 SDLoc DL(N);
16103 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
16104 EVT VT = N->getValueType(0);
16105
16106 if (VT.isVector() && TLI.isTypeLegal(VT)) {
16107 MVT SrcVT = Src.getSimpleValueType();
16108 MVT SrcContainerVT = SrcVT;
16109 MVT ContainerVT = VT.getSimpleVT();
16110 SDValue XVal = Src.getOperand(0);
16111
16112 // For widening and narrowing conversions we just combine it into a
16113 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
16114 // end up getting lowered to their appropriate pseudo instructions based on
16115 // their operand types
16116 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
16117 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
16118 return SDValue();
16119
16120 // Make fixed-length vectors scalable first
16121 if (SrcVT.isFixedLengthVector()) {
16122 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
16123 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
16124 ContainerVT =
16125 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
16126 }
16127
16128 auto [Mask, VL] =
16129 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
16130
16131 SDValue FpToInt;
16132 if (FRM == RISCVFPRndMode::RTZ) {
16133 // Use the dedicated trunc static rounding mode if we're truncating so we
16134 // don't need to generate calls to fsrmi/fsrm
16135 unsigned Opc =
16137 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
16138 } else {
16139 unsigned Opc =
16141 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
16142 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
16143 }
16144
16145 // If converted from fixed-length to scalable, convert back
16146 if (VT.isFixedLengthVector())
16147 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
16148
16149 return FpToInt;
16150 }
16151
16152 // Only handle XLen or i32 types. Other types narrower than XLen will
16153 // eventually be legalized to XLenVT.
16154 if (VT != MVT::i32 && VT != XLenVT)
16155 return SDValue();
16156
16157 unsigned Opc;
16158 if (VT == XLenVT)
16159 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
16160 else
16162
16163 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
16164 DAG.getTargetConstant(FRM, DL, XLenVT));
16165 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
16166}
16167
16168// Fold
16169// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
16170// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
16171// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
16172// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
16173// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
16174// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
16177 const RISCVSubtarget &Subtarget) {
16178 SelectionDAG &DAG = DCI.DAG;
16179 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16180 MVT XLenVT = Subtarget.getXLenVT();
16181
16182 // Only handle XLen types. Other types narrower than XLen will eventually be
16183 // legalized to XLenVT.
16184 EVT DstVT = N->getValueType(0);
16185 if (DstVT != XLenVT)
16186 return SDValue();
16187
16188 SDValue Src = N->getOperand(0);
16189
16190 // Don't do this for strict-fp Src.
16191 if (Src->isStrictFPOpcode())
16192 return SDValue();
16193
16194 // Ensure the FP type is also legal.
16195 if (!TLI.isTypeLegal(Src.getValueType()))
16196 return SDValue();
16197
16198 // Don't do this for f16 with Zfhmin and not Zfh.
16199 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
16200 return SDValue();
16201
16202 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
16203
16204 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
16205 if (FRM == RISCVFPRndMode::Invalid)
16206 return SDValue();
16207
16208 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
16209
16210 unsigned Opc;
16211 if (SatVT == DstVT)
16212 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
16213 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
16215 else
16216 return SDValue();
16217 // FIXME: Support other SatVTs by clamping before or after the conversion.
16218
16219 Src = Src.getOperand(0);
16220
16221 SDLoc DL(N);
16222 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
16223 DAG.getTargetConstant(FRM, DL, XLenVT));
16224
16225 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
16226 // extend.
16227 if (Opc == RISCVISD::FCVT_WU_RV64)
16228 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
16229
16230 // RISC-V FP-to-int conversions saturate to the destination register size, but
16231 // don't produce 0 for nan.
16232 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
16233 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
16234}
16235
16236// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
16237// smaller than XLenVT.
16239 const RISCVSubtarget &Subtarget) {
16240 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
16241
16242 SDValue Src = N->getOperand(0);
16243 if (Src.getOpcode() != ISD::BSWAP)
16244 return SDValue();
16245
16246 EVT VT = N->getValueType(0);
16247 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
16248 !llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))
16249 return SDValue();
16250
16251 SDLoc DL(N);
16252 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
16253}
16254
16256 const RISCVSubtarget &Subtarget) {
16257 // Fold:
16258 // vp.reverse(vp.load(ADDR, MASK)) -> vp.strided.load(ADDR, -1, MASK)
16259
16260 // Check if its first operand is a vp.load.
16261 auto *VPLoad = dyn_cast<VPLoadSDNode>(N->getOperand(0));
16262 if (!VPLoad)
16263 return SDValue();
16264
16265 EVT LoadVT = VPLoad->getValueType(0);
16266 // We do not have a strided_load version for masks, and the evl of vp.reverse
16267 // and vp.load should always be the same.
16268 if (!LoadVT.getVectorElementType().isByteSized() ||
16269 N->getOperand(2) != VPLoad->getVectorLength() ||
16270 !N->getOperand(0).hasOneUse())
16271 return SDValue();
16272
16273 // Check if the mask of outer vp.reverse are all 1's.
16274 if (!isOneOrOneSplat(N->getOperand(1)))
16275 return SDValue();
16276
16277 SDValue LoadMask = VPLoad->getMask();
16278 // If Mask is all ones, then load is unmasked and can be reversed.
16279 if (!isOneOrOneSplat(LoadMask)) {
16280 // If the mask is not all ones, we can reverse the load if the mask was also
16281 // reversed by an unmasked vp.reverse with the same EVL.
16282 if (LoadMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
16283 !isOneOrOneSplat(LoadMask.getOperand(1)) ||
16284 LoadMask.getOperand(2) != VPLoad->getVectorLength())
16285 return SDValue();
16286 LoadMask = LoadMask.getOperand(0);
16287 }
16288
16289 // Base = LoadAddr + (NumElem - 1) * ElemWidthByte
16290 SDLoc DL(N);
16291 MVT XLenVT = Subtarget.getXLenVT();
16292 SDValue NumElem = VPLoad->getVectorLength();
16293 uint64_t ElemWidthByte = VPLoad->getValueType(0).getScalarSizeInBits() / 8;
16294
16295 SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem,
16296 DAG.getConstant(1, DL, XLenVT));
16297 SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1,
16298 DAG.getConstant(ElemWidthByte, DL, XLenVT));
16299 SDValue Base = DAG.getNode(ISD::ADD, DL, XLenVT, VPLoad->getBasePtr(), Temp2);
16300 SDValue Stride = DAG.getConstant(-ElemWidthByte, DL, XLenVT);
16301
16303 MachinePointerInfo PtrInfo(VPLoad->getAddressSpace());
16305 PtrInfo, VPLoad->getMemOperand()->getFlags(),
16306 LocationSize::beforeOrAfterPointer(), VPLoad->getAlign());
16307
16308 SDValue Ret = DAG.getStridedLoadVP(
16309 LoadVT, DL, VPLoad->getChain(), Base, Stride, LoadMask,
16310 VPLoad->getVectorLength(), MMO, VPLoad->isExpandingLoad());
16311
16312 DAG.ReplaceAllUsesOfValueWith(SDValue(VPLoad, 1), Ret.getValue(1));
16313
16314 return Ret;
16315}
16316
16318 const RISCVSubtarget &Subtarget) {
16319 // Fold:
16320 // vp.store(vp.reverse(VAL), ADDR, MASK) -> vp.strided.store(VAL, NEW_ADDR,
16321 // -1, MASK)
16322 auto *VPStore = cast<VPStoreSDNode>(N);
16323
16324 if (VPStore->getValue().getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE)
16325 return SDValue();
16326
16327 SDValue VPReverse = VPStore->getValue();
16328 EVT ReverseVT = VPReverse->getValueType(0);
16329
16330 // We do not have a strided_store version for masks, and the evl of vp.reverse
16331 // and vp.store should always be the same.
16332 if (!ReverseVT.getVectorElementType().isByteSized() ||
16333 VPStore->getVectorLength() != VPReverse.getOperand(2) ||
16334 !VPReverse.hasOneUse())
16335 return SDValue();
16336
16337 SDValue StoreMask = VPStore->getMask();
16338 // If Mask is all ones, then load is unmasked and can be reversed.
16339 if (!isOneOrOneSplat(StoreMask)) {
16340 // If the mask is not all ones, we can reverse the store if the mask was
16341 // also reversed by an unmasked vp.reverse with the same EVL.
16342 if (StoreMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
16343 !isOneOrOneSplat(StoreMask.getOperand(1)) ||
16344 StoreMask.getOperand(2) != VPStore->getVectorLength())
16345 return SDValue();
16346 StoreMask = StoreMask.getOperand(0);
16347 }
16348
16349 // Base = StoreAddr + (NumElem - 1) * ElemWidthByte
16350 SDLoc DL(N);
16351 MVT XLenVT = Subtarget.getXLenVT();
16352 SDValue NumElem = VPStore->getVectorLength();
16353 uint64_t ElemWidthByte = VPReverse.getValueType().getScalarSizeInBits() / 8;
16354
16355 SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem,
16356 DAG.getConstant(1, DL, XLenVT));
16357 SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1,
16358 DAG.getConstant(ElemWidthByte, DL, XLenVT));
16359 SDValue Base =
16360 DAG.getNode(ISD::ADD, DL, XLenVT, VPStore->getBasePtr(), Temp2);
16361 SDValue Stride = DAG.getConstant(-ElemWidthByte, DL, XLenVT);
16362
16364 MachinePointerInfo PtrInfo(VPStore->getAddressSpace());
16366 PtrInfo, VPStore->getMemOperand()->getFlags(),
16367 LocationSize::beforeOrAfterPointer(), VPStore->getAlign());
16368
16369 return DAG.getStridedStoreVP(
16370 VPStore->getChain(), DL, VPReverse.getOperand(0), Base,
16371 VPStore->getOffset(), Stride, StoreMask, VPStore->getVectorLength(),
16372 VPStore->getMemoryVT(), MMO, VPStore->getAddressingMode(),
16373 VPStore->isTruncatingStore(), VPStore->isCompressingStore());
16374}
16375
16376// Convert from one FMA opcode to another based on whether we are negating the
16377// multiply result and/or the accumulator.
16378// NOTE: Only supports RVV operations with VL.
16379static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
16380 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
16381 if (NegMul) {
16382 // clang-format off
16383 switch (Opcode) {
16384 default: llvm_unreachable("Unexpected opcode");
16385 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
16386 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
16387 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
16388 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
16393 }
16394 // clang-format on
16395 }
16396
16397 // Negating the accumulator changes ADD<->SUB.
16398 if (NegAcc) {
16399 // clang-format off
16400 switch (Opcode) {
16401 default: llvm_unreachable("Unexpected opcode");
16402 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
16403 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
16404 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
16405 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
16410 }
16411 // clang-format on
16412 }
16413
16414 return Opcode;
16415}
16416
16418 // Fold FNEG_VL into FMA opcodes.
16419 // The first operand of strict-fp is chain.
16420 bool IsStrict =
16421 DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode());
16422 unsigned Offset = IsStrict ? 1 : 0;
16423 SDValue A = N->getOperand(0 + Offset);
16424 SDValue B = N->getOperand(1 + Offset);
16425 SDValue C = N->getOperand(2 + Offset);
16426 SDValue Mask = N->getOperand(3 + Offset);
16427 SDValue VL = N->getOperand(4 + Offset);
16428
16429 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
16430 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
16431 V.getOperand(2) == VL) {
16432 // Return the negated input.
16433 V = V.getOperand(0);
16434 return true;
16435 }
16436
16437 return false;
16438 };
16439
16440 bool NegA = invertIfNegative(A);
16441 bool NegB = invertIfNegative(B);
16442 bool NegC = invertIfNegative(C);
16443
16444 // If no operands are negated, we're done.
16445 if (!NegA && !NegB && !NegC)
16446 return SDValue();
16447
16448 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
16449 if (IsStrict)
16450 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
16451 {N->getOperand(0), A, B, C, Mask, VL});
16452 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
16453 VL);
16454}
16455
16458 const RISCVSubtarget &Subtarget) {
16459 SelectionDAG &DAG = DCI.DAG;
16460
16462 return V;
16463
16464 // FIXME: Ignore strict opcodes for now.
16465 if (DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode()))
16466 return SDValue();
16467
16468 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
16469}
16470
16472 const RISCVSubtarget &Subtarget) {
16473 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
16474
16475 EVT VT = N->getValueType(0);
16476
16477 if (VT != Subtarget.getXLenVT())
16478 return SDValue();
16479
16480 if (!isa<ConstantSDNode>(N->getOperand(1)))
16481 return SDValue();
16482 uint64_t ShAmt = N->getConstantOperandVal(1);
16483
16484 SDValue N0 = N->getOperand(0);
16485
16486 // Combine (sra (sext_inreg (shl X, C1), iX), C2) ->
16487 // (sra (shl X, C1+(XLen-iX)), C2+(XLen-iX)) so it gets selected as SLLI+SRAI.
16488 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse()) {
16489 unsigned ExtSize =
16490 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
16491 if (ShAmt < ExtSize && N0.getOperand(0).getOpcode() == ISD::SHL &&
16492 N0.getOperand(0).hasOneUse() &&
16493 isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
16494 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
16495 if (LShAmt < ExtSize) {
16496 unsigned Size = VT.getSizeInBits();
16497 SDLoc ShlDL(N0.getOperand(0));
16498 SDValue Shl =
16499 DAG.getNode(ISD::SHL, ShlDL, VT, N0.getOperand(0).getOperand(0),
16500 DAG.getConstant(LShAmt + (Size - ExtSize), ShlDL, VT));
16501 SDLoc DL(N);
16502 return DAG.getNode(ISD::SRA, DL, VT, Shl,
16503 DAG.getConstant(ShAmt + (Size - ExtSize), DL, VT));
16504 }
16505 }
16506 }
16507
16508 if (ShAmt > 32 || VT != MVT::i64)
16509 return SDValue();
16510
16511 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
16512 // FIXME: Should this be a generic combine? There's a similar combine on X86.
16513 //
16514 // Also try these folds where an add or sub is in the middle.
16515 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
16516 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
16517 SDValue Shl;
16518 ConstantSDNode *AddC = nullptr;
16519
16520 // We might have an ADD or SUB between the SRA and SHL.
16521 bool IsAdd = N0.getOpcode() == ISD::ADD;
16522 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
16523 // Other operand needs to be a constant we can modify.
16524 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
16525 if (!AddC)
16526 return SDValue();
16527
16528 // AddC needs to have at least 32 trailing zeros.
16529 if (llvm::countr_zero(AddC->getZExtValue()) < 32)
16530 return SDValue();
16531
16532 // All users should be a shift by constant less than or equal to 32. This
16533 // ensures we'll do this optimization for each of them to produce an
16534 // add/sub+sext_inreg they can all share.
16535 for (SDNode *U : N0->users()) {
16536 if (U->getOpcode() != ISD::SRA ||
16537 !isa<ConstantSDNode>(U->getOperand(1)) ||
16538 U->getConstantOperandVal(1) > 32)
16539 return SDValue();
16540 }
16541
16542 Shl = N0.getOperand(IsAdd ? 0 : 1);
16543 } else {
16544 // Not an ADD or SUB.
16545 Shl = N0;
16546 }
16547
16548 // Look for a shift left by 32.
16549 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
16550 Shl.getConstantOperandVal(1) != 32)
16551 return SDValue();
16552
16553 // We if we didn't look through an add/sub, then the shl should have one use.
16554 // If we did look through an add/sub, the sext_inreg we create is free so
16555 // we're only creating 2 new instructions. It's enough to only remove the
16556 // original sra+add/sub.
16557 if (!AddC && !Shl.hasOneUse())
16558 return SDValue();
16559
16560 SDLoc DL(N);
16561 SDValue In = Shl.getOperand(0);
16562
16563 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
16564 // constant.
16565 if (AddC) {
16566 SDValue ShiftedAddC =
16567 DAG.getConstant(AddC->getZExtValue() >> 32, DL, MVT::i64);
16568 if (IsAdd)
16569 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
16570 else
16571 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
16572 }
16573
16574 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
16575 DAG.getValueType(MVT::i32));
16576 if (ShAmt == 32)
16577 return SExt;
16578
16579 return DAG.getNode(
16580 ISD::SHL, DL, MVT::i64, SExt,
16581 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
16582}
16583
16584// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
16585// the result is used as the conditon of a br_cc or select_cc we can invert,
16586// inverting the setcc is free, and Z is 0/1. Caller will invert the
16587// br_cc/select_cc.
16589 bool IsAnd = Cond.getOpcode() == ISD::AND;
16590 if (!IsAnd && Cond.getOpcode() != ISD::OR)
16591 return SDValue();
16592
16593 if (!Cond.hasOneUse())
16594 return SDValue();
16595
16596 SDValue Setcc = Cond.getOperand(0);
16597 SDValue Xor = Cond.getOperand(1);
16598 // Canonicalize setcc to LHS.
16599 if (Setcc.getOpcode() != ISD::SETCC)
16600 std::swap(Setcc, Xor);
16601 // LHS should be a setcc and RHS should be an xor.
16602 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
16603 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
16604 return SDValue();
16605
16606 // If the condition is an And, SimplifyDemandedBits may have changed
16607 // (xor Z, 1) to (not Z).
16608 SDValue Xor1 = Xor.getOperand(1);
16609 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
16610 return SDValue();
16611
16612 EVT VT = Cond.getValueType();
16613 SDValue Xor0 = Xor.getOperand(0);
16614
16615 // The LHS of the xor needs to be 0/1.
16617 if (!DAG.MaskedValueIsZero(Xor0, Mask))
16618 return SDValue();
16619
16620 // We can only invert integer setccs.
16621 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
16622 if (!SetCCOpVT.isScalarInteger())
16623 return SDValue();
16624
16625 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
16626 if (ISD::isIntEqualitySetCC(CCVal)) {
16627 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
16628 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
16629 Setcc.getOperand(1), CCVal);
16630 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
16631 // Invert (setlt 0, X) by converting to (setlt X, 1).
16632 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
16633 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
16634 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
16635 // (setlt X, 1) by converting to (setlt 0, X).
16636 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
16637 DAG.getConstant(0, SDLoc(Setcc), VT),
16638 Setcc.getOperand(0), CCVal);
16639 } else
16640 return SDValue();
16641
16642 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
16643 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
16644}
16645
16646// Perform common combines for BR_CC and SELECT_CC condtions.
16647static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
16648 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
16649 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
16650
16651 // As far as arithmetic right shift always saves the sign,
16652 // shift can be omitted.
16653 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
16654 // setge (sra X, N), 0 -> setge X, 0
16655 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
16656 LHS.getOpcode() == ISD::SRA) {
16657 LHS = LHS.getOperand(0);
16658 return true;
16659 }
16660
16661 if (!ISD::isIntEqualitySetCC(CCVal))
16662 return false;
16663
16664 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
16665 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
16666 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
16667 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
16668 // If we're looking for eq 0 instead of ne 0, we need to invert the
16669 // condition.
16670 bool Invert = CCVal == ISD::SETEQ;
16671 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
16672 if (Invert)
16673 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
16674
16675 RHS = LHS.getOperand(1);
16676 LHS = LHS.getOperand(0);
16677 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
16678
16679 CC = DAG.getCondCode(CCVal);
16680 return true;
16681 }
16682
16683 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
16684 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) {
16685 RHS = LHS.getOperand(1);
16686 LHS = LHS.getOperand(0);
16687 return true;
16688 }
16689
16690 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
16691 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
16692 LHS.getOperand(1).getOpcode() == ISD::Constant) {
16693 SDValue LHS0 = LHS.getOperand(0);
16694 if (LHS0.getOpcode() == ISD::AND &&
16695 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
16696 uint64_t Mask = LHS0.getConstantOperandVal(1);
16697 uint64_t ShAmt = LHS.getConstantOperandVal(1);
16698 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
16699 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
16700 CC = DAG.getCondCode(CCVal);
16701
16702 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
16703 LHS = LHS0.getOperand(0);
16704 if (ShAmt != 0)
16705 LHS =
16706 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
16707 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
16708 return true;
16709 }
16710 }
16711 }
16712
16713 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
16714 // This can occur when legalizing some floating point comparisons.
16715 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
16716 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
16717 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
16718 CC = DAG.getCondCode(CCVal);
16719 RHS = DAG.getConstant(0, DL, LHS.getValueType());
16720 return true;
16721 }
16722
16723 if (isNullConstant(RHS)) {
16724 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
16725 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
16726 CC = DAG.getCondCode(CCVal);
16727 LHS = NewCond;
16728 return true;
16729 }
16730 }
16731
16732 return false;
16733}
16734
16735// Fold
16736// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
16737// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
16738// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
16739// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
16741 SDValue TrueVal, SDValue FalseVal,
16742 bool Swapped) {
16743 bool Commutative = true;
16744 unsigned Opc = TrueVal.getOpcode();
16745 switch (Opc) {
16746 default:
16747 return SDValue();
16748 case ISD::SHL:
16749 case ISD::SRA:
16750 case ISD::SRL:
16751 case ISD::SUB:
16752 Commutative = false;
16753 break;
16754 case ISD::ADD:
16755 case ISD::OR:
16756 case ISD::XOR:
16757 break;
16758 }
16759
16760 if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal))
16761 return SDValue();
16762
16763 unsigned OpToFold;
16764 if (FalseVal == TrueVal.getOperand(0))
16765 OpToFold = 0;
16766 else if (Commutative && FalseVal == TrueVal.getOperand(1))
16767 OpToFold = 1;
16768 else
16769 return SDValue();
16770
16771 EVT VT = N->getValueType(0);
16772 SDLoc DL(N);
16773 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
16774 EVT OtherOpVT = OtherOp.getValueType();
16775 SDValue IdentityOperand =
16776 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
16777 if (!Commutative)
16778 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
16779 assert(IdentityOperand && "No identity operand!");
16780
16781 if (Swapped)
16782 std::swap(OtherOp, IdentityOperand);
16783 SDValue NewSel =
16784 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
16785 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
16786}
16787
16788// This tries to get rid of `select` and `icmp` that are being used to handle
16789// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
16791 SDValue Cond = N->getOperand(0);
16792
16793 // This represents either CTTZ or CTLZ instruction.
16794 SDValue CountZeroes;
16795
16796 SDValue ValOnZero;
16797
16798 if (Cond.getOpcode() != ISD::SETCC)
16799 return SDValue();
16800
16801 if (!isNullConstant(Cond->getOperand(1)))
16802 return SDValue();
16803
16804 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
16805 if (CCVal == ISD::CondCode::SETEQ) {
16806 CountZeroes = N->getOperand(2);
16807 ValOnZero = N->getOperand(1);
16808 } else if (CCVal == ISD::CondCode::SETNE) {
16809 CountZeroes = N->getOperand(1);
16810 ValOnZero = N->getOperand(2);
16811 } else {
16812 return SDValue();
16813 }
16814
16815 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
16816 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
16817 CountZeroes = CountZeroes.getOperand(0);
16818
16819 if (CountZeroes.getOpcode() != ISD::CTTZ &&
16820 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
16821 CountZeroes.getOpcode() != ISD::CTLZ &&
16822 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
16823 return SDValue();
16824
16825 if (!isNullConstant(ValOnZero))
16826 return SDValue();
16827
16828 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
16829 if (Cond->getOperand(0) != CountZeroesArgument)
16830 return SDValue();
16831
16832 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
16833 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
16834 CountZeroes.getValueType(), CountZeroesArgument);
16835 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
16836 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
16837 CountZeroes.getValueType(), CountZeroesArgument);
16838 }
16839
16840 unsigned BitWidth = CountZeroes.getValueSizeInBits();
16841 SDValue BitWidthMinusOne =
16842 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
16843
16844 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
16845 CountZeroes, BitWidthMinusOne);
16846 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
16847}
16848
16850 const RISCVSubtarget &Subtarget) {
16851 SDValue Cond = N->getOperand(0);
16852 SDValue True = N->getOperand(1);
16853 SDValue False = N->getOperand(2);
16854 SDLoc DL(N);
16855 EVT VT = N->getValueType(0);
16856 EVT CondVT = Cond.getValueType();
16857
16858 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
16859 return SDValue();
16860
16861 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
16862 // BEXTI, where C is power of 2.
16863 if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&
16864 (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) {
16865 SDValue LHS = Cond.getOperand(0);
16866 SDValue RHS = Cond.getOperand(1);
16867 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16868 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
16869 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
16870 const APInt &MaskVal = LHS.getConstantOperandAPInt(1);
16871 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))
16872 return DAG.getSelect(DL, VT,
16873 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
16874 False, True);
16875 }
16876 }
16877 return SDValue();
16878}
16879
16880static bool matchSelectAddSub(SDValue TrueVal, SDValue FalseVal, bool &SwapCC) {
16881 if (!TrueVal.hasOneUse() || !FalseVal.hasOneUse())
16882 return false;
16883
16884 SwapCC = false;
16885 if (TrueVal.getOpcode() == ISD::SUB && FalseVal.getOpcode() == ISD::ADD) {
16886 std::swap(TrueVal, FalseVal);
16887 SwapCC = true;
16888 }
16889
16890 if (TrueVal.getOpcode() != ISD::ADD || FalseVal.getOpcode() != ISD::SUB)
16891 return false;
16892
16893 SDValue A = FalseVal.getOperand(0);
16894 SDValue B = FalseVal.getOperand(1);
16895 // Add is commutative, so check both orders
16896 return ((TrueVal.getOperand(0) == A && TrueVal.getOperand(1) == B) ||
16897 (TrueVal.getOperand(1) == A && TrueVal.getOperand(0) == B));
16898}
16899
16900/// Convert vselect CC, (add a, b), (sub a, b) to add a, (vselect CC, -b, b).
16901/// This allows us match a vadd.vv fed by a masked vrsub, which reduces
16902/// register pressure over the add followed by masked vsub sequence.
16904 SDLoc DL(N);
16905 EVT VT = N->getValueType(0);
16906 SDValue CC = N->getOperand(0);
16907 SDValue TrueVal = N->getOperand(1);
16908 SDValue FalseVal = N->getOperand(2);
16909
16910 bool SwapCC;
16911 if (!matchSelectAddSub(TrueVal, FalseVal, SwapCC))
16912 return SDValue();
16913
16914 SDValue Sub = SwapCC ? TrueVal : FalseVal;
16915 SDValue A = Sub.getOperand(0);
16916 SDValue B = Sub.getOperand(1);
16917
16918 // Arrange the select such that we can match a masked
16919 // vrsub.vi to perform the conditional negate
16920 SDValue NegB = DAG.getNegative(B, DL, VT);
16921 if (!SwapCC)
16922 CC = DAG.getLogicalNOT(DL, CC, CC->getValueType(0));
16923 SDValue NewB = DAG.getNode(ISD::VSELECT, DL, VT, CC, NegB, B);
16924 return DAG.getNode(ISD::ADD, DL, VT, A, NewB);
16925}
16926
16928 const RISCVSubtarget &Subtarget) {
16929 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
16930 return Folded;
16931
16932 if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
16933 return V;
16934
16935 if (Subtarget.hasConditionalMoveFusion())
16936 return SDValue();
16937
16938 SDValue TrueVal = N->getOperand(1);
16939 SDValue FalseVal = N->getOperand(2);
16940 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
16941 return V;
16942 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
16943}
16944
16945/// If we have a build_vector where each lane is binop X, C, where C
16946/// is a constant (but not necessarily the same constant on all lanes),
16947/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
16948/// We assume that materializing a constant build vector will be no more
16949/// expensive that performing O(n) binops.
16951 const RISCVSubtarget &Subtarget,
16952 const RISCVTargetLowering &TLI) {
16953 SDLoc DL(N);
16954 EVT VT = N->getValueType(0);
16955
16956 assert(!VT.isScalableVector() && "unexpected build vector");
16957
16958 if (VT.getVectorNumElements() == 1)
16959 return SDValue();
16960
16961 const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
16962 if (!TLI.isBinOp(Opcode))
16963 return SDValue();
16964
16965 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
16966 return SDValue();
16967
16968 // This BUILD_VECTOR involves an implicit truncation, and sinking
16969 // truncates through binops is non-trivial.
16970 if (N->op_begin()->getValueType() != VT.getVectorElementType())
16971 return SDValue();
16972
16973 SmallVector<SDValue> LHSOps;
16974 SmallVector<SDValue> RHSOps;
16975 for (SDValue Op : N->ops()) {
16976 if (Op.isUndef()) {
16977 // We can't form a divide or remainder from undef.
16978 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
16979 return SDValue();
16980
16981 LHSOps.push_back(Op);
16982 RHSOps.push_back(Op);
16983 continue;
16984 }
16985
16986 // TODO: We can handle operations which have an neutral rhs value
16987 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
16988 // of profit in a more explicit manner.
16989 if (Op.getOpcode() != Opcode || !Op.hasOneUse())
16990 return SDValue();
16991
16992 LHSOps.push_back(Op.getOperand(0));
16993 if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
16994 !isa<ConstantFPSDNode>(Op.getOperand(1)))
16995 return SDValue();
16996 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
16997 // have different LHS and RHS types.
16998 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
16999 return SDValue();
17000
17001 RHSOps.push_back(Op.getOperand(1));
17002 }
17003
17004 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
17005 DAG.getBuildVector(VT, DL, RHSOps));
17006}
17007
17009 const RISCVSubtarget &Subtarget,
17010 const RISCVTargetLowering &TLI) {
17011 SDValue InVec = N->getOperand(0);
17012 SDValue InVal = N->getOperand(1);
17013 SDValue EltNo = N->getOperand(2);
17014 SDLoc DL(N);
17015
17016 EVT VT = InVec.getValueType();
17017 if (VT.isScalableVector())
17018 return SDValue();
17019
17020 if (!InVec.hasOneUse())
17021 return SDValue();
17022
17023 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
17024 // move the insert_vector_elts into the arms of the binop. Note that
17025 // the new RHS must be a constant.
17026 const unsigned InVecOpcode = InVec->getOpcode();
17027 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
17028 InVal.hasOneUse()) {
17029 SDValue InVecLHS = InVec->getOperand(0);
17030 SDValue InVecRHS = InVec->getOperand(1);
17031 SDValue InValLHS = InVal->getOperand(0);
17032 SDValue InValRHS = InVal->getOperand(1);
17033
17035 return SDValue();
17036 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
17037 return SDValue();
17038 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
17039 // have different LHS and RHS types.
17040 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
17041 return SDValue();
17043 InVecLHS, InValLHS, EltNo);
17045 InVecRHS, InValRHS, EltNo);
17046 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
17047 }
17048
17049 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
17050 // move the insert_vector_elt to the source operand of the concat_vector.
17051 if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
17052 return SDValue();
17053
17054 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
17055 if (!IndexC)
17056 return SDValue();
17057 unsigned Elt = IndexC->getZExtValue();
17058
17059 EVT ConcatVT = InVec.getOperand(0).getValueType();
17060 if (ConcatVT.getVectorElementType() != InVal.getValueType())
17061 return SDValue();
17062 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
17063 SDValue NewIdx = DAG.getVectorIdxConstant(Elt % ConcatNumElts, DL);
17064
17065 unsigned ConcatOpIdx = Elt / ConcatNumElts;
17066 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
17067 ConcatOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ConcatVT,
17068 ConcatOp, InVal, NewIdx);
17069
17070 SmallVector<SDValue> ConcatOps;
17071 ConcatOps.append(InVec->op_begin(), InVec->op_end());
17072 ConcatOps[ConcatOpIdx] = ConcatOp;
17073 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
17074}
17075
17076// If we're concatenating a series of vector loads like
17077// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
17078// Then we can turn this into a strided load by widening the vector elements
17079// vlse32 p, stride=n
17081 const RISCVSubtarget &Subtarget,
17082 const RISCVTargetLowering &TLI) {
17083 SDLoc DL(N);
17084 EVT VT = N->getValueType(0);
17085
17086 // Only perform this combine on legal MVTs.
17087 if (!TLI.isTypeLegal(VT))
17088 return SDValue();
17089
17090 // TODO: Potentially extend this to scalable vectors
17091 if (VT.isScalableVector())
17092 return SDValue();
17093
17094 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
17095 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
17096 !SDValue(BaseLd, 0).hasOneUse())
17097 return SDValue();
17098
17099 EVT BaseLdVT = BaseLd->getValueType(0);
17100
17101 // Go through the loads and check that they're strided
17103 Lds.push_back(BaseLd);
17104 Align Align = BaseLd->getAlign();
17105 for (SDValue Op : N->ops().drop_front()) {
17106 auto *Ld = dyn_cast<LoadSDNode>(Op);
17107 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
17108 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
17109 Ld->getValueType(0) != BaseLdVT)
17110 return SDValue();
17111
17112 Lds.push_back(Ld);
17113
17114 // The common alignment is the most restrictive (smallest) of all the loads
17115 Align = std::min(Align, Ld->getAlign());
17116 }
17117
17118 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
17119 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
17120 LoadSDNode *Ld2) -> std::optional<PtrDiff> {
17121 // If the load ptrs can be decomposed into a common (Base + Index) with a
17122 // common constant stride, then return the constant stride.
17123 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
17124 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
17125 if (BIO1.equalBaseIndex(BIO2, DAG))
17126 return {{BIO2.getOffset() - BIO1.getOffset(), false}};
17127
17128 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
17129 SDValue P1 = Ld1->getBasePtr();
17130 SDValue P2 = Ld2->getBasePtr();
17131 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
17132 return {{P2.getOperand(1), false}};
17133 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
17134 return {{P1.getOperand(1), true}};
17135
17136 return std::nullopt;
17137 };
17138
17139 // Get the distance between the first and second loads
17140 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
17141 if (!BaseDiff)
17142 return SDValue();
17143
17144 // Check all the loads are the same distance apart
17145 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
17146 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
17147 return SDValue();
17148
17149 // TODO: At this point, we've successfully matched a generalized gather
17150 // load. Maybe we should emit that, and then move the specialized
17151 // matchers above and below into a DAG combine?
17152
17153 // Get the widened scalar type, e.g. v4i8 -> i64
17154 unsigned WideScalarBitWidth =
17155 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
17156 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
17157
17158 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
17159 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
17160 if (!TLI.isTypeLegal(WideVecVT))
17161 return SDValue();
17162
17163 // Check that the operation is legal
17164 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
17165 return SDValue();
17166
17167 auto [StrideVariant, MustNegateStride] = *BaseDiff;
17168 SDValue Stride =
17169 std::holds_alternative<SDValue>(StrideVariant)
17170 ? std::get<SDValue>(StrideVariant)
17171 : DAG.getSignedConstant(std::get<int64_t>(StrideVariant), DL,
17172 Lds[0]->getOffset().getValueType());
17173 if (MustNegateStride)
17174 Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
17175
17176 SDValue AllOneMask =
17177 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
17178 DAG.getConstant(1, DL, MVT::i1));
17179
17180 uint64_t MemSize;
17181 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
17182 ConstStride && ConstStride->getSExtValue() >= 0)
17183 // total size = (elsize * n) + (stride - elsize) * (n-1)
17184 // = elsize + stride * (n-1)
17185 MemSize = WideScalarVT.getSizeInBits() +
17186 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
17187 else
17188 // If Stride isn't constant, then we can't know how much it will load
17190
17192 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
17193 Align);
17194
17195 SDValue StridedLoad = DAG.getStridedLoadVP(
17196 WideVecVT, DL, BaseLd->getChain(), BaseLd->getBasePtr(), Stride,
17197 AllOneMask,
17198 DAG.getConstant(N->getNumOperands(), DL, Subtarget.getXLenVT()), MMO);
17199
17200 for (SDValue Ld : N->ops())
17201 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
17202
17203 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
17204}
17205
17207 const RISCVSubtarget &Subtarget,
17208 const RISCVTargetLowering &TLI) {
17209 SDLoc DL(N);
17210 EVT VT = N->getValueType(0);
17211 const unsigned ElementSize = VT.getScalarSizeInBits();
17212 const unsigned NumElts = VT.getVectorNumElements();
17213 SDValue V1 = N->getOperand(0);
17214 SDValue V2 = N->getOperand(1);
17215 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask();
17216 MVT XLenVT = Subtarget.getXLenVT();
17217
17218 // Recognized a disguised select of add/sub.
17219 bool SwapCC;
17220 if (ShuffleVectorInst::isSelectMask(Mask, NumElts) &&
17221 matchSelectAddSub(V1, V2, SwapCC)) {
17222 SDValue Sub = SwapCC ? V1 : V2;
17223 SDValue A = Sub.getOperand(0);
17224 SDValue B = Sub.getOperand(1);
17225
17226 SmallVector<SDValue> MaskVals;
17227 for (int MaskIndex : Mask) {
17228 bool SelectMaskVal = (MaskIndex < (int)NumElts);
17229 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
17230 }
17231 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
17232 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts);
17233 SDValue CC = DAG.getBuildVector(MaskVT, DL, MaskVals);
17234
17235 // Arrange the select such that we can match a masked
17236 // vrsub.vi to perform the conditional negate
17237 SDValue NegB = DAG.getNegative(B, DL, VT);
17238 if (!SwapCC)
17239 CC = DAG.getLogicalNOT(DL, CC, CC->getValueType(0));
17240 SDValue NewB = DAG.getNode(ISD::VSELECT, DL, VT, CC, NegB, B);
17241 return DAG.getNode(ISD::ADD, DL, VT, A, NewB);
17242 }
17243
17244 // Custom legalize <N x i128> or <N x i256> to <M x ELEN>. This runs
17245 // during the combine phase before type legalization, and relies on
17246 // DAGCombine not undoing the transform if isShuffleMaskLegal returns false
17247 // for the source mask.
17248 if (TLI.isTypeLegal(VT) || ElementSize <= Subtarget.getELen() ||
17249 !isPowerOf2_64(ElementSize) || VT.getVectorNumElements() % 2 != 0 ||
17250 VT.isFloatingPoint() || TLI.isShuffleMaskLegal(Mask, VT))
17251 return SDValue();
17252
17253 SmallVector<int, 8> NewMask;
17254 narrowShuffleMaskElts(2, Mask, NewMask);
17255
17256 LLVMContext &C = *DAG.getContext();
17257 EVT NewEltVT = EVT::getIntegerVT(C, ElementSize / 2);
17258 EVT NewVT = EVT::getVectorVT(C, NewEltVT, VT.getVectorNumElements() * 2);
17259 SDValue Res = DAG.getVectorShuffle(NewVT, DL, DAG.getBitcast(NewVT, V1),
17260 DAG.getBitcast(NewVT, V2), NewMask);
17261 return DAG.getBitcast(VT, Res);
17262}
17263
17265 const RISCVSubtarget &Subtarget) {
17266
17267 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
17268
17269 if (N->getValueType(0).isFixedLengthVector())
17270 return SDValue();
17271
17272 SDValue Addend = N->getOperand(0);
17273 SDValue MulOp = N->getOperand(1);
17274
17275 if (N->getOpcode() == RISCVISD::ADD_VL) {
17276 SDValue AddPassthruOp = N->getOperand(2);
17277 if (!AddPassthruOp.isUndef())
17278 return SDValue();
17279 }
17280
17281 auto IsVWMulOpc = [](unsigned Opc) {
17282 switch (Opc) {
17283 case RISCVISD::VWMUL_VL:
17286 return true;
17287 default:
17288 return false;
17289 }
17290 };
17291
17292 if (!IsVWMulOpc(MulOp.getOpcode()))
17293 std::swap(Addend, MulOp);
17294
17295 if (!IsVWMulOpc(MulOp.getOpcode()))
17296 return SDValue();
17297
17298 SDValue MulPassthruOp = MulOp.getOperand(2);
17299
17300 if (!MulPassthruOp.isUndef())
17301 return SDValue();
17302
17303 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
17304 const RISCVSubtarget &Subtarget) {
17305 if (N->getOpcode() == ISD::ADD) {
17306 SDLoc DL(N);
17307 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
17308 Subtarget);
17309 }
17310 return std::make_pair(N->getOperand(3), N->getOperand(4));
17311 }(N, DAG, Subtarget);
17312
17313 SDValue MulMask = MulOp.getOperand(3);
17314 SDValue MulVL = MulOp.getOperand(4);
17315
17316 if (AddMask != MulMask || AddVL != MulVL)
17317 return SDValue();
17318
17319 unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL;
17320 static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL,
17321 "Unexpected opcode after VWMACC_VL");
17322 static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL,
17323 "Unexpected opcode after VWMACC_VL!");
17324 static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL,
17325 "Unexpected opcode after VWMUL_VL!");
17326 static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL,
17327 "Unexpected opcode after VWMUL_VL!");
17328
17329 SDLoc DL(N);
17330 EVT VT = N->getValueType(0);
17331 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
17332 AddVL};
17333 return DAG.getNode(Opc, DL, VT, Ops);
17334}
17335
17337 ISD::MemIndexType &IndexType,
17339 if (!DCI.isBeforeLegalize())
17340 return false;
17341
17342 SelectionDAG &DAG = DCI.DAG;
17343 const MVT XLenVT =
17344 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
17345
17346 const EVT IndexVT = Index.getValueType();
17347
17348 // RISC-V indexed loads only support the "unsigned unscaled" addressing
17349 // mode, so anything else must be manually legalized.
17350 if (!isIndexTypeSigned(IndexType))
17351 return false;
17352
17353 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
17354 // Any index legalization should first promote to XLenVT, so we don't lose
17355 // bits when scaling. This may create an illegal index type so we let
17356 // LLVM's legalization take care of the splitting.
17357 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
17358 Index = DAG.getNode(ISD::SIGN_EXTEND, DL,
17359 IndexVT.changeVectorElementType(XLenVT), Index);
17360 }
17361 IndexType = ISD::UNSIGNED_SCALED;
17362 return true;
17363}
17364
17365/// Match the index vector of a scatter or gather node as the shuffle mask
17366/// which performs the rearrangement if possible. Will only match if
17367/// all lanes are touched, and thus replacing the scatter or gather with
17368/// a unit strided access and shuffle is legal.
17369static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask,
17370 SmallVector<int> &ShuffleMask) {
17371 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
17372 return false;
17373 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
17374 return false;
17375
17376 const unsigned ElementSize = VT.getScalarStoreSize();
17377 const unsigned NumElems = VT.getVectorNumElements();
17378
17379 // Create the shuffle mask and check all bits active
17380 assert(ShuffleMask.empty());
17381 BitVector ActiveLanes(NumElems);
17382 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
17383 // TODO: We've found an active bit of UB, and could be
17384 // more aggressive here if desired.
17385 if (Index->getOperand(i)->isUndef())
17386 return false;
17387 uint64_t C = Index->getConstantOperandVal(i);
17388 if (C % ElementSize != 0)
17389 return false;
17390 C = C / ElementSize;
17391 if (C >= NumElems)
17392 return false;
17393 ShuffleMask.push_back(C);
17394 ActiveLanes.set(C);
17395 }
17396 return ActiveLanes.all();
17397}
17398
17399/// Match the index of a gather or scatter operation as an operation
17400/// with twice the element width and half the number of elements. This is
17401/// generally profitable (if legal) because these operations are linear
17402/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
17403/// come out ahead.
17404static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask,
17405 Align BaseAlign, const RISCVSubtarget &ST) {
17406 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
17407 return false;
17408 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
17409 return false;
17410
17411 // Attempt a doubling. If we can use a element type 4x or 8x in
17412 // size, this will happen via multiply iterations of the transform.
17413 const unsigned NumElems = VT.getVectorNumElements();
17414 if (NumElems % 2 != 0)
17415 return false;
17416
17417 const unsigned ElementSize = VT.getScalarStoreSize();
17418 const unsigned WiderElementSize = ElementSize * 2;
17419 if (WiderElementSize > ST.getELen()/8)
17420 return false;
17421
17422 if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
17423 return false;
17424
17425 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
17426 // TODO: We've found an active bit of UB, and could be
17427 // more aggressive here if desired.
17428 if (Index->getOperand(i)->isUndef())
17429 return false;
17430 // TODO: This offset check is too strict if we support fully
17431 // misaligned memory operations.
17432 uint64_t C = Index->getConstantOperandVal(i);
17433 if (i % 2 == 0) {
17434 if (C % WiderElementSize != 0)
17435 return false;
17436 continue;
17437 }
17438 uint64_t Last = Index->getConstantOperandVal(i-1);
17439 if (C != Last + ElementSize)
17440 return false;
17441 }
17442 return true;
17443}
17444
17445// trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
17446// This would be benefit for the cases where X and Y are both the same value
17447// type of low precision vectors. Since the truncate would be lowered into
17448// n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
17449// restriction, such pattern would be expanded into a series of "vsetvli"
17450// and "vnsrl" instructions later to reach this point.
17452 SDValue Mask = N->getOperand(1);
17453 SDValue VL = N->getOperand(2);
17454
17455 bool IsVLMAX = isAllOnesConstant(VL) ||
17456 (isa<RegisterSDNode>(VL) &&
17457 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
17458 if (!IsVLMAX || Mask.getOpcode() != RISCVISD::VMSET_VL ||
17459 Mask.getOperand(0) != VL)
17460 return SDValue();
17461
17462 auto IsTruncNode = [&](SDValue V) {
17463 return V.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
17464 V.getOperand(1) == Mask && V.getOperand(2) == VL;
17465 };
17466
17467 SDValue Op = N->getOperand(0);
17468
17469 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
17470 // to distinguish such pattern.
17471 while (IsTruncNode(Op)) {
17472 if (!Op.hasOneUse())
17473 return SDValue();
17474 Op = Op.getOperand(0);
17475 }
17476
17477 if (Op.getOpcode() != ISD::SRA || !Op.hasOneUse())
17478 return SDValue();
17479
17480 SDValue N0 = Op.getOperand(0);
17481 SDValue N1 = Op.getOperand(1);
17482 if (N0.getOpcode() != ISD::SIGN_EXTEND || !N0.hasOneUse() ||
17483 N1.getOpcode() != ISD::ZERO_EXTEND || !N1.hasOneUse())
17484 return SDValue();
17485
17486 SDValue N00 = N0.getOperand(0);
17487 SDValue N10 = N1.getOperand(0);
17488 if (!N00.getValueType().isVector() ||
17489 N00.getValueType() != N10.getValueType() ||
17490 N->getValueType(0) != N10.getValueType())
17491 return SDValue();
17492
17493 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
17494 SDValue SMin =
17495 DAG.getNode(ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
17496 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
17497 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
17498}
17499
17500// Combine (truncate_vector_vl (umin X, C)) -> (vnclipu_vl X) if C is the
17501// maximum value for the truncated type.
17502// Combine (truncate_vector_vl (smin (smax X, C2), C1)) -> (vnclip_vl X) if C1
17503// is the signed maximum value for the truncated type and C2 is the signed
17504// minimum value.
17506 const RISCVSubtarget &Subtarget) {
17507 assert(N->getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL);
17508
17509 MVT VT = N->getSimpleValueType(0);
17510
17511 SDValue Mask = N->getOperand(1);
17512 SDValue VL = N->getOperand(2);
17513
17514 auto MatchMinMax = [&VL, &Mask](SDValue V, unsigned Opc, unsigned OpcVL,
17515 APInt &SplatVal) {
17516 if (V.getOpcode() != Opc &&
17517 !(V.getOpcode() == OpcVL && V.getOperand(2).isUndef() &&
17518 V.getOperand(3) == Mask && V.getOperand(4) == VL))
17519 return SDValue();
17520
17521 SDValue Op = V.getOperand(1);
17522
17523 // Peek through conversion between fixed and scalable vectors.
17524 if (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op.getOperand(0).isUndef() &&
17525 isNullConstant(Op.getOperand(2)) &&
17526 Op.getOperand(1).getValueType().isFixedLengthVector() &&
17527 Op.getOperand(1).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
17528 Op.getOperand(1).getOperand(0).getValueType() == Op.getValueType() &&
17529 isNullConstant(Op.getOperand(1).getOperand(1)))
17530 Op = Op.getOperand(1).getOperand(0);
17531
17532 if (ISD::isConstantSplatVector(Op.getNode(), SplatVal))
17533 return V.getOperand(0);
17534
17535 if (Op.getOpcode() == RISCVISD::VMV_V_X_VL && Op.getOperand(0).isUndef() &&
17536 Op.getOperand(2) == VL) {
17537 if (auto *Op1 = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
17538 SplatVal =
17539 Op1->getAPIntValue().sextOrTrunc(Op.getScalarValueSizeInBits());
17540 return V.getOperand(0);
17541 }
17542 }
17543
17544 return SDValue();
17545 };
17546
17547 SDLoc DL(N);
17548
17549 auto DetectUSatPattern = [&](SDValue V) {
17550 APInt LoC, HiC;
17551
17552 // Simple case, V is a UMIN.
17553 if (SDValue UMinOp = MatchMinMax(V, ISD::UMIN, RISCVISD::UMIN_VL, HiC))
17554 if (HiC.isMask(VT.getScalarSizeInBits()))
17555 return UMinOp;
17556
17557 // If we have an SMAX that removes negative numbers first, then we can match
17558 // SMIN instead of UMIN.
17559 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
17560 if (SDValue SMaxOp =
17561 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
17562 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()))
17563 return SMinOp;
17564
17565 // If we have an SMIN before an SMAX and the SMAX constant is less than or
17566 // equal to the SMIN constant, we can use vnclipu if we insert a new SMAX
17567 // first.
17568 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
17569 if (SDValue SMinOp =
17570 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
17571 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()) &&
17572 HiC.uge(LoC))
17573 return DAG.getNode(RISCVISD::SMAX_VL, DL, V.getValueType(), SMinOp,
17574 V.getOperand(1), DAG.getUNDEF(V.getValueType()),
17575 Mask, VL);
17576
17577 return SDValue();
17578 };
17579
17580 auto DetectSSatPattern = [&](SDValue V) {
17581 unsigned NumDstBits = VT.getScalarSizeInBits();
17582 unsigned NumSrcBits = V.getScalarValueSizeInBits();
17583 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
17584 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
17585
17586 APInt HiC, LoC;
17587 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
17588 if (SDValue SMaxOp =
17589 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
17590 if (HiC == SignedMax && LoC == SignedMin)
17591 return SMaxOp;
17592
17593 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
17594 if (SDValue SMinOp =
17595 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
17596 if (HiC == SignedMax && LoC == SignedMin)
17597 return SMinOp;
17598
17599 return SDValue();
17600 };
17601
17602 SDValue Src = N->getOperand(0);
17603
17604 // Look through multiple layers of truncates.
17605 while (Src.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
17606 Src.getOperand(1) == Mask && Src.getOperand(2) == VL &&
17607 Src.hasOneUse())
17608 Src = Src.getOperand(0);
17609
17610 SDValue Val;
17611 unsigned ClipOpc;
17612 if ((Val = DetectUSatPattern(Src)))
17614 else if ((Val = DetectSSatPattern(Src)))
17616 else
17617 return SDValue();
17618
17619 MVT ValVT = Val.getSimpleValueType();
17620
17621 do {
17622 MVT ValEltVT = MVT::getIntegerVT(ValVT.getScalarSizeInBits() / 2);
17623 ValVT = ValVT.changeVectorElementType(ValEltVT);
17624 Val = DAG.getNode(ClipOpc, DL, ValVT, Val, Mask, VL);
17625 } while (ValVT != VT);
17626
17627 return Val;
17628}
17629
17630// Convert
17631// (iX ctpop (bitcast (vXi1 A)))
17632// ->
17633// (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))
17634// FIXME: It's complicated to match all the variations of this after type
17635// legalization so we only handle the pre-type legalization pattern, but that
17636// requires the fixed vector type to be legal.
17638 const RISCVSubtarget &Subtarget) {
17639 EVT VT = N->getValueType(0);
17640 if (!VT.isScalarInteger())
17641 return SDValue();
17642
17643 SDValue Src = N->getOperand(0);
17644
17645 // Peek through zero_extend. It doesn't change the count.
17646 if (Src.getOpcode() == ISD::ZERO_EXTEND)
17647 Src = Src.getOperand(0);
17648
17649 if (Src.getOpcode() != ISD::BITCAST)
17650 return SDValue();
17651
17652 Src = Src.getOperand(0);
17653 EVT SrcEVT = Src.getValueType();
17654 if (!SrcEVT.isSimple())
17655 return SDValue();
17656
17657 MVT SrcMVT = SrcEVT.getSimpleVT();
17658 // Make sure the input is an i1 vector.
17659 if (!SrcMVT.isVector() || SrcMVT.getVectorElementType() != MVT::i1)
17660 return SDValue();
17661
17662 if (!useRVVForFixedLengthVectorVT(SrcMVT, Subtarget))
17663 return SDValue();
17664
17665 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcMVT, Subtarget);
17666 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
17667
17668 SDLoc DL(N);
17669 auto [Mask, VL] = getDefaultVLOps(SrcMVT, ContainerVT, DL, DAG, Subtarget);
17670
17671 MVT XLenVT = Subtarget.getXLenVT();
17672 SDValue Pop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Src, Mask, VL);
17673 return DAG.getZExtOrTrunc(Pop, DL, VT);
17674}
17675
17677 DAGCombinerInfo &DCI) const {
17678 SelectionDAG &DAG = DCI.DAG;
17679 const MVT XLenVT = Subtarget.getXLenVT();
17680 SDLoc DL(N);
17681
17682 // Helper to call SimplifyDemandedBits on an operand of N where only some low
17683 // bits are demanded. N will be added to the Worklist if it was not deleted.
17684 // Caller should return SDValue(N, 0) if this returns true.
17685 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
17686 SDValue Op = N->getOperand(OpNo);
17687 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
17688 if (!SimplifyDemandedBits(Op, Mask, DCI))
17689 return false;
17690
17691 if (N->getOpcode() != ISD::DELETED_NODE)
17692 DCI.AddToWorklist(N);
17693 return true;
17694 };
17695
17696 switch (N->getOpcode()) {
17697 default:
17698 break;
17699 case RISCVISD::SplitF64: {
17700 SDValue Op0 = N->getOperand(0);
17701 // If the input to SplitF64 is just BuildPairF64 then the operation is
17702 // redundant. Instead, use BuildPairF64's operands directly.
17703 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
17704 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
17705
17706 if (Op0->isUndef()) {
17707 SDValue Lo = DAG.getUNDEF(MVT::i32);
17708 SDValue Hi = DAG.getUNDEF(MVT::i32);
17709 return DCI.CombineTo(N, Lo, Hi);
17710 }
17711
17712 // It's cheaper to materialise two 32-bit integers than to load a double
17713 // from the constant pool and transfer it to integer registers through the
17714 // stack.
17715 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
17716 APInt V = C->getValueAPF().bitcastToAPInt();
17717 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
17718 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
17719 return DCI.CombineTo(N, Lo, Hi);
17720 }
17721
17722 // This is a target-specific version of a DAGCombine performed in
17723 // DAGCombiner::visitBITCAST. It performs the equivalent of:
17724 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
17725 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
17726 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
17727 !Op0.getNode()->hasOneUse() || Subtarget.hasStdExtZdinx())
17728 break;
17729 SDValue NewSplitF64 =
17730 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
17731 Op0.getOperand(0));
17732 SDValue Lo = NewSplitF64.getValue(0);
17733 SDValue Hi = NewSplitF64.getValue(1);
17734 APInt SignBit = APInt::getSignMask(32);
17735 if (Op0.getOpcode() == ISD::FNEG) {
17736 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
17737 DAG.getConstant(SignBit, DL, MVT::i32));
17738 return DCI.CombineTo(N, Lo, NewHi);
17739 }
17740 assert(Op0.getOpcode() == ISD::FABS);
17741 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
17742 DAG.getConstant(~SignBit, DL, MVT::i32));
17743 return DCI.CombineTo(N, Lo, NewHi);
17744 }
17745 case RISCVISD::SLLW:
17746 case RISCVISD::SRAW:
17747 case RISCVISD::SRLW:
17748 case RISCVISD::RORW:
17749 case RISCVISD::ROLW: {
17750 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
17751 if (SimplifyDemandedLowBitsHelper(0, 32) ||
17752 SimplifyDemandedLowBitsHelper(1, 5))
17753 return SDValue(N, 0);
17754
17755 break;
17756 }
17757 case RISCVISD::CLZW:
17758 case RISCVISD::CTZW: {
17759 // Only the lower 32 bits of the first operand are read
17760 if (SimplifyDemandedLowBitsHelper(0, 32))
17761 return SDValue(N, 0);
17762 break;
17763 }
17765 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
17766 // conversion is unnecessary and can be replaced with the
17767 // FMV_X_ANYEXTW_RV64 operand.
17768 SDValue Op0 = N->getOperand(0);
17770 return Op0.getOperand(0);
17771 break;
17772 }
17775 SDLoc DL(N);
17776 SDValue Op0 = N->getOperand(0);
17777 MVT VT = N->getSimpleValueType(0);
17778
17779 // Constant fold.
17780 if (auto *CFP = dyn_cast<ConstantFPSDNode>(Op0)) {
17781 APInt Val = CFP->getValueAPF().bitcastToAPInt().sext(VT.getSizeInBits());
17782 return DAG.getConstant(Val, DL, VT);
17783 }
17784
17785 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
17786 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
17787 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
17788 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
17789 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
17790 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
17791 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
17792 assert(Op0.getOperand(0).getValueType() == VT &&
17793 "Unexpected value type!");
17794 return Op0.getOperand(0);
17795 }
17796
17797 if (ISD::isNormalLoad(Op0.getNode()) && Op0.hasOneUse() &&
17798 cast<LoadSDNode>(Op0)->isSimple()) {
17800 auto *LN0 = cast<LoadSDNode>(Op0);
17801 SDValue Load =
17802 DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(),
17803 LN0->getBasePtr(), IVT, LN0->getMemOperand());
17804 DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
17805 return Load;
17806 }
17807
17808 // This is a target-specific version of a DAGCombine performed in
17809 // DAGCombiner::visitBITCAST. It performs the equivalent of:
17810 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
17811 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
17812 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
17813 !Op0.getNode()->hasOneUse())
17814 break;
17815 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
17816 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
17817 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
17818 if (Op0.getOpcode() == ISD::FNEG)
17819 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
17820 DAG.getConstant(SignBit, DL, VT));
17821
17822 assert(Op0.getOpcode() == ISD::FABS);
17823 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
17824 DAG.getConstant(~SignBit, DL, VT));
17825 }
17826 case ISD::ABS: {
17827 EVT VT = N->getValueType(0);
17828 SDValue N0 = N->getOperand(0);
17829 // abs (sext) -> zext (abs)
17830 // abs (zext) -> zext (handled elsewhere)
17831 if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
17832 SDValue Src = N0.getOperand(0);
17833 SDLoc DL(N);
17834 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
17835 DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src));
17836 }
17837 break;
17838 }
17839 case ISD::ADD: {
17840 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17841 return V;
17842 if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
17843 return V;
17844 return performADDCombine(N, DCI, Subtarget);
17845 }
17846 case ISD::SUB: {
17847 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17848 return V;
17849 return performSUBCombine(N, DAG, Subtarget);
17850 }
17851 case ISD::AND:
17852 return performANDCombine(N, DCI, Subtarget);
17853 case ISD::OR: {
17854 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17855 return V;
17856 return performORCombine(N, DCI, Subtarget);
17857 }
17858 case ISD::XOR:
17859 return performXORCombine(N, DAG, Subtarget);
17860 case ISD::MUL:
17861 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17862 return V;
17863 return performMULCombine(N, DAG, DCI, Subtarget);
17864 case ISD::SDIV:
17865 case ISD::UDIV:
17866 case ISD::SREM:
17867 case ISD::UREM:
17868 if (SDValue V = combineBinOpOfZExt(N, DAG))
17869 return V;
17870 break;
17871 case ISD::FMUL: {
17872 // fmul X, (copysign 1.0, Y) -> fsgnjx X, Y
17873 SDValue N0 = N->getOperand(0);
17874 SDValue N1 = N->getOperand(1);
17875 if (N0->getOpcode() != ISD::FCOPYSIGN)
17876 std::swap(N0, N1);
17877 if (N0->getOpcode() != ISD::FCOPYSIGN)
17878 return SDValue();
17879 ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0->getOperand(0));
17880 if (!C || !C->getValueAPF().isExactlyValue(+1.0))
17881 return SDValue();
17882 EVT VT = N->getValueType(0);
17883 if (VT.isVector() || !isOperationLegal(ISD::FCOPYSIGN, VT))
17884 return SDValue();
17885 SDValue Sign = N0->getOperand(1);
17886 if (Sign.getValueType() != VT)
17887 return SDValue();
17888 return DAG.getNode(RISCVISD::FSGNJX, SDLoc(N), VT, N1, N0->getOperand(1));
17889 }
17890 case ISD::FADD:
17891 case ISD::UMAX:
17892 case ISD::UMIN:
17893 case ISD::SMAX:
17894 case ISD::SMIN:
17895 case ISD::FMAXNUM:
17896 case ISD::FMINNUM: {
17897 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
17898 return V;
17899 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
17900 return V;
17901 return SDValue();
17902 }
17903 case ISD::SETCC:
17904 return performSETCCCombine(N, DAG, Subtarget);
17906 return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
17907 case ISD::ZERO_EXTEND:
17908 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
17909 // type legalization. This is safe because fp_to_uint produces poison if
17910 // it overflows.
17911 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
17912 SDValue Src = N->getOperand(0);
17913 if (Src.getOpcode() == ISD::FP_TO_UINT &&
17914 isTypeLegal(Src.getOperand(0).getValueType()))
17915 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
17916 Src.getOperand(0));
17917 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
17918 isTypeLegal(Src.getOperand(1).getValueType())) {
17919 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
17920 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
17921 Src.getOperand(0), Src.getOperand(1));
17922 DCI.CombineTo(N, Res);
17923 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
17924 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
17925 return SDValue(N, 0); // Return N so it doesn't get rechecked.
17926 }
17927 }
17928 return SDValue();
17930 if (SDValue V = combineTruncOfSraSext(N, DAG))
17931 return V;
17932 return combineTruncToVnclip(N, DAG, Subtarget);
17933 case ISD::TRUNCATE:
17934 return performTRUNCATECombine(N, DAG, Subtarget);
17935 case ISD::SELECT:
17936 return performSELECTCombine(N, DAG, Subtarget);
17937 case ISD::VSELECT:
17938 return performVSELECTCombine(N, DAG);
17940 case RISCVISD::CZERO_NEZ: {
17941 SDValue Val = N->getOperand(0);
17942 SDValue Cond = N->getOperand(1);
17943
17944 unsigned Opc = N->getOpcode();
17945
17946 // czero_eqz x, x -> x
17947 if (Opc == RISCVISD::CZERO_EQZ && Val == Cond)
17948 return Val;
17949
17950 unsigned InvOpc =
17952
17953 // czero_eqz X, (xor Y, 1) -> czero_nez X, Y if Y is 0 or 1.
17954 // czero_nez X, (xor Y, 1) -> czero_eqz X, Y if Y is 0 or 1.
17955 if (Cond.getOpcode() == ISD::XOR && isOneConstant(Cond.getOperand(1))) {
17956 SDValue NewCond = Cond.getOperand(0);
17957 APInt Mask = APInt::getBitsSetFrom(NewCond.getValueSizeInBits(), 1);
17958 if (DAG.MaskedValueIsZero(NewCond, Mask))
17959 return DAG.getNode(InvOpc, SDLoc(N), N->getValueType(0), Val, NewCond);
17960 }
17961 // czero_eqz x, (setcc y, 0, ne) -> czero_eqz x, y
17962 // czero_nez x, (setcc y, 0, ne) -> czero_nez x, y
17963 // czero_eqz x, (setcc y, 0, eq) -> czero_nez x, y
17964 // czero_nez x, (setcc y, 0, eq) -> czero_eqz x, y
17965 if (Cond.getOpcode() == ISD::SETCC && isNullConstant(Cond.getOperand(1))) {
17966 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
17967 if (ISD::isIntEqualitySetCC(CCVal))
17968 return DAG.getNode(CCVal == ISD::SETNE ? Opc : InvOpc, SDLoc(N),
17969 N->getValueType(0), Val, Cond.getOperand(0));
17970 }
17971 return SDValue();
17972 }
17973 case RISCVISD::SELECT_CC: {
17974 // Transform
17975 SDValue LHS = N->getOperand(0);
17976 SDValue RHS = N->getOperand(1);
17977 SDValue CC = N->getOperand(2);
17978 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
17979 SDValue TrueV = N->getOperand(3);
17980 SDValue FalseV = N->getOperand(4);
17981 SDLoc DL(N);
17982 EVT VT = N->getValueType(0);
17983
17984 // If the True and False values are the same, we don't need a select_cc.
17985 if (TrueV == FalseV)
17986 return TrueV;
17987
17988 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
17989 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
17990 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
17991 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
17992 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
17993 if (CCVal == ISD::CondCode::SETGE)
17994 std::swap(TrueV, FalseV);
17995
17996 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
17997 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
17998 // Only handle simm12, if it is not in this range, it can be considered as
17999 // register.
18000 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
18001 isInt<12>(TrueSImm - FalseSImm)) {
18002 SDValue SRA =
18003 DAG.getNode(ISD::SRA, DL, VT, LHS,
18004 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
18005 SDValue AND =
18006 DAG.getNode(ISD::AND, DL, VT, SRA,
18007 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
18008 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
18009 }
18010
18011 if (CCVal == ISD::CondCode::SETGE)
18012 std::swap(TrueV, FalseV);
18013 }
18014
18015 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
18016 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
18017 {LHS, RHS, CC, TrueV, FalseV});
18018
18019 if (!Subtarget.hasConditionalMoveFusion()) {
18020 // (select c, -1, y) -> -c | y
18021 if (isAllOnesConstant(TrueV)) {
18022 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
18023 SDValue Neg = DAG.getNegative(C, DL, VT);
18024 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
18025 }
18026 // (select c, y, -1) -> -!c | y
18027 if (isAllOnesConstant(FalseV)) {
18028 SDValue C =
18029 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
18030 SDValue Neg = DAG.getNegative(C, DL, VT);
18031 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
18032 }
18033
18034 // (select c, 0, y) -> -!c & y
18035 if (isNullConstant(TrueV)) {
18036 SDValue C =
18037 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
18038 SDValue Neg = DAG.getNegative(C, DL, VT);
18039 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
18040 }
18041 // (select c, y, 0) -> -c & y
18042 if (isNullConstant(FalseV)) {
18043 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
18044 SDValue Neg = DAG.getNegative(C, DL, VT);
18045 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
18046 }
18047 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
18048 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
18049 if (((isOneConstant(FalseV) && LHS == TrueV &&
18050 CCVal == ISD::CondCode::SETNE) ||
18051 (isOneConstant(TrueV) && LHS == FalseV &&
18052 CCVal == ISD::CondCode::SETEQ)) &&
18054 // freeze it to be safe.
18055 LHS = DAG.getFreeze(LHS);
18057 return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
18058 }
18059 }
18060
18061 // If both true/false are an xor with 1, pull through the select.
18062 // This can occur after op legalization if both operands are setccs that
18063 // require an xor to invert.
18064 // FIXME: Generalize to other binary ops with identical operand?
18065 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
18066 TrueV.getOperand(1) == FalseV.getOperand(1) &&
18067 isOneConstant(TrueV.getOperand(1)) &&
18068 TrueV.hasOneUse() && FalseV.hasOneUse()) {
18069 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
18070 TrueV.getOperand(0), FalseV.getOperand(0));
18071 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
18072 }
18073
18074 return SDValue();
18075 }
18076 case RISCVISD::BR_CC: {
18077 SDValue LHS = N->getOperand(1);
18078 SDValue RHS = N->getOperand(2);
18079 SDValue CC = N->getOperand(3);
18080 SDLoc DL(N);
18081
18082 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
18083 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
18084 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
18085
18086 return SDValue();
18087 }
18088 case ISD::BITREVERSE:
18089 return performBITREVERSECombine(N, DAG, Subtarget);
18090 case ISD::FP_TO_SINT:
18091 case ISD::FP_TO_UINT:
18092 return performFP_TO_INTCombine(N, DCI, Subtarget);
18095 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
18096 case ISD::FCOPYSIGN: {
18097 EVT VT = N->getValueType(0);
18098 if (!VT.isVector())
18099 break;
18100 // There is a form of VFSGNJ which injects the negated sign of its second
18101 // operand. Try and bubble any FNEG up after the extend/round to produce
18102 // this optimized pattern. Avoid modifying cases where FP_ROUND and
18103 // TRUNC=1.
18104 SDValue In2 = N->getOperand(1);
18105 // Avoid cases where the extend/round has multiple uses, as duplicating
18106 // those is typically more expensive than removing a fneg.
18107 if (!In2.hasOneUse())
18108 break;
18109 if (In2.getOpcode() != ISD::FP_EXTEND &&
18110 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
18111 break;
18112 In2 = In2.getOperand(0);
18113 if (In2.getOpcode() != ISD::FNEG)
18114 break;
18115 SDLoc DL(N);
18116 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
18117 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
18118 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
18119 }
18120 case ISD::MGATHER: {
18121 const auto *MGN = cast<MaskedGatherSDNode>(N);
18122 const EVT VT = N->getValueType(0);
18123 SDValue Index = MGN->getIndex();
18124 SDValue ScaleOp = MGN->getScale();
18125 ISD::MemIndexType IndexType = MGN->getIndexType();
18126 assert(!MGN->isIndexScaled() &&
18127 "Scaled gather/scatter should not be formed");
18128
18129 SDLoc DL(N);
18130 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
18131 return DAG.getMaskedGather(
18132 N->getVTList(), MGN->getMemoryVT(), DL,
18133 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
18134 MGN->getBasePtr(), Index, ScaleOp},
18135 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
18136
18137 if (narrowIndex(Index, IndexType, DAG))
18138 return DAG.getMaskedGather(
18139 N->getVTList(), MGN->getMemoryVT(), DL,
18140 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
18141 MGN->getBasePtr(), Index, ScaleOp},
18142 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
18143
18144 if (Index.getOpcode() == ISD::BUILD_VECTOR &&
18145 MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
18146 // The sequence will be XLenVT, not the type of Index. Tell
18147 // isSimpleVIDSequence this so we avoid overflow.
18148 if (std::optional<VIDSequence> SimpleVID =
18149 isSimpleVIDSequence(Index, Subtarget.getXLen());
18150 SimpleVID && SimpleVID->StepDenominator == 1) {
18151 const int64_t StepNumerator = SimpleVID->StepNumerator;
18152 const int64_t Addend = SimpleVID->Addend;
18153
18154 // Note: We don't need to check alignment here since (by assumption
18155 // from the existance of the gather), our offsets must be sufficiently
18156 // aligned.
18157
18158 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
18159 assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
18160 assert(IndexType == ISD::UNSIGNED_SCALED);
18161 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
18162 DAG.getSignedConstant(Addend, DL, PtrVT));
18163
18164 SDValue EVL = DAG.getElementCount(DL, Subtarget.getXLenVT(),
18166 SDValue StridedLoad = DAG.getStridedLoadVP(
18167 VT, DL, MGN->getChain(), BasePtr,
18168 DAG.getSignedConstant(StepNumerator, DL, XLenVT), MGN->getMask(),
18169 EVL, MGN->getMemOperand());
18170 SDValue VPSelect = DAG.getNode(ISD::VP_SELECT, DL, VT, MGN->getMask(),
18171 StridedLoad, MGN->getPassThru(), EVL);
18172 return DAG.getMergeValues({VPSelect, SDValue(StridedLoad.getNode(), 1)},
18173 DL);
18174 }
18175 }
18176
18177 SmallVector<int> ShuffleMask;
18178 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
18179 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
18180 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
18181 MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
18182 MGN->getMask(), DAG.getUNDEF(VT),
18183 MGN->getMemoryVT(), MGN->getMemOperand(),
18185 SDValue Shuffle =
18186 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
18187 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
18188 }
18189
18190 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
18191 matchIndexAsWiderOp(VT, Index, MGN->getMask(),
18192 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
18193 SmallVector<SDValue> NewIndices;
18194 for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
18195 NewIndices.push_back(Index.getOperand(i));
18196 EVT IndexVT = Index.getValueType()
18197 .getHalfNumVectorElementsVT(*DAG.getContext());
18198 Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
18199
18200 unsigned ElementSize = VT.getScalarStoreSize();
18201 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
18202 auto EltCnt = VT.getVectorElementCount();
18203 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
18204 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
18205 EltCnt.divideCoefficientBy(2));
18206 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
18207 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
18208 EltCnt.divideCoefficientBy(2));
18209 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
18210
18211 SDValue Gather =
18212 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
18213 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
18214 Index, ScaleOp},
18215 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
18216 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
18217 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
18218 }
18219 break;
18220 }
18221 case ISD::MSCATTER:{
18222 const auto *MSN = cast<MaskedScatterSDNode>(N);
18223 SDValue Index = MSN->getIndex();
18224 SDValue ScaleOp = MSN->getScale();
18225 ISD::MemIndexType IndexType = MSN->getIndexType();
18226 assert(!MSN->isIndexScaled() &&
18227 "Scaled gather/scatter should not be formed");
18228
18229 SDLoc DL(N);
18230 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
18231 return DAG.getMaskedScatter(
18232 N->getVTList(), MSN->getMemoryVT(), DL,
18233 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
18234 Index, ScaleOp},
18235 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
18236
18237 if (narrowIndex(Index, IndexType, DAG))
18238 return DAG.getMaskedScatter(
18239 N->getVTList(), MSN->getMemoryVT(), DL,
18240 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
18241 Index, ScaleOp},
18242 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
18243
18244 EVT VT = MSN->getValue()->getValueType(0);
18245 SmallVector<int> ShuffleMask;
18246 if (!MSN->isTruncatingStore() &&
18247 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
18248 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
18249 DAG.getUNDEF(VT), ShuffleMask);
18250 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
18251 DAG.getUNDEF(XLenVT), MSN->getMask(),
18252 MSN->getMemoryVT(), MSN->getMemOperand(),
18253 ISD::UNINDEXED, false);
18254 }
18255 break;
18256 }
18257 case ISD::VP_GATHER: {
18258 const auto *VPGN = cast<VPGatherSDNode>(N);
18259 SDValue Index = VPGN->getIndex();
18260 SDValue ScaleOp = VPGN->getScale();
18261 ISD::MemIndexType IndexType = VPGN->getIndexType();
18262 assert(!VPGN->isIndexScaled() &&
18263 "Scaled gather/scatter should not be formed");
18264
18265 SDLoc DL(N);
18266 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
18267 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
18268 {VPGN->getChain(), VPGN->getBasePtr(), Index,
18269 ScaleOp, VPGN->getMask(),
18270 VPGN->getVectorLength()},
18271 VPGN->getMemOperand(), IndexType);
18272
18273 if (narrowIndex(Index, IndexType, DAG))
18274 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
18275 {VPGN->getChain(), VPGN->getBasePtr(), Index,
18276 ScaleOp, VPGN->getMask(),
18277 VPGN->getVectorLength()},
18278 VPGN->getMemOperand(), IndexType);
18279
18280 break;
18281 }
18282 case ISD::VP_SCATTER: {
18283 const auto *VPSN = cast<VPScatterSDNode>(N);
18284 SDValue Index = VPSN->getIndex();
18285 SDValue ScaleOp = VPSN->getScale();
18286 ISD::MemIndexType IndexType = VPSN->getIndexType();
18287 assert(!VPSN->isIndexScaled() &&
18288 "Scaled gather/scatter should not be formed");
18289
18290 SDLoc DL(N);
18291 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
18292 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
18293 {VPSN->getChain(), VPSN->getValue(),
18294 VPSN->getBasePtr(), Index, ScaleOp,
18295 VPSN->getMask(), VPSN->getVectorLength()},
18296 VPSN->getMemOperand(), IndexType);
18297
18298 if (narrowIndex(Index, IndexType, DAG))
18299 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
18300 {VPSN->getChain(), VPSN->getValue(),
18301 VPSN->getBasePtr(), Index, ScaleOp,
18302 VPSN->getMask(), VPSN->getVectorLength()},
18303 VPSN->getMemOperand(), IndexType);
18304 break;
18305 }
18306 case RISCVISD::SHL_VL:
18307 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
18308 return V;
18309 [[fallthrough]];
18310 case RISCVISD::SRA_VL:
18311 case RISCVISD::SRL_VL: {
18312 SDValue ShAmt = N->getOperand(1);
18314 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
18315 SDLoc DL(N);
18316 SDValue VL = N->getOperand(4);
18317 EVT VT = N->getValueType(0);
18318 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
18319 ShAmt.getOperand(1), VL);
18320 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
18321 N->getOperand(2), N->getOperand(3), N->getOperand(4));
18322 }
18323 break;
18324 }
18325 case ISD::SRA:
18326 if (SDValue V = performSRACombine(N, DAG, Subtarget))
18327 return V;
18328 [[fallthrough]];
18329 case ISD::SRL:
18330 case ISD::SHL: {
18331 if (N->getOpcode() == ISD::SHL) {
18332 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
18333 return V;
18334 }
18335 SDValue ShAmt = N->getOperand(1);
18337 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
18338 SDLoc DL(N);
18339 EVT VT = N->getValueType(0);
18340 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
18341 ShAmt.getOperand(1),
18342 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
18343 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
18344 }
18345 break;
18346 }
18347 case RISCVISD::ADD_VL:
18348 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
18349 return V;
18350 return combineToVWMACC(N, DAG, Subtarget);
18355 return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
18356 case RISCVISD::SUB_VL:
18357 case RISCVISD::MUL_VL:
18358 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
18367 return performVFMADD_VLCombine(N, DCI, Subtarget);
18368 case RISCVISD::FADD_VL:
18369 case RISCVISD::FSUB_VL:
18370 case RISCVISD::FMUL_VL:
18373 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
18374 case ISD::LOAD:
18375 case ISD::STORE: {
18376 if (DCI.isAfterLegalizeDAG())
18377 if (SDValue V = performMemPairCombine(N, DCI))
18378 return V;
18379
18380 if (N->getOpcode() != ISD::STORE)
18381 break;
18382
18383 auto *Store = cast<StoreSDNode>(N);
18384 SDValue Chain = Store->getChain();
18385 EVT MemVT = Store->getMemoryVT();
18386 SDValue Val = Store->getValue();
18387 SDLoc DL(N);
18388
18389 bool IsScalarizable =
18390 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
18391 Store->isSimple() &&
18392 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
18393 isPowerOf2_64(MemVT.getSizeInBits()) &&
18394 MemVT.getSizeInBits() <= Subtarget.getXLen();
18395
18396 // If sufficiently aligned we can scalarize stores of constant vectors of
18397 // any power-of-two size up to XLen bits, provided that they aren't too
18398 // expensive to materialize.
18399 // vsetivli zero, 2, e8, m1, ta, ma
18400 // vmv.v.i v8, 4
18401 // vse64.v v8, (a0)
18402 // ->
18403 // li a1, 1028
18404 // sh a1, 0(a0)
18405 if (DCI.isBeforeLegalize() && IsScalarizable &&
18407 // Get the constant vector bits
18408 APInt NewC(Val.getValueSizeInBits(), 0);
18409 uint64_t EltSize = Val.getScalarValueSizeInBits();
18410 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
18411 if (Val.getOperand(i).isUndef())
18412 continue;
18413 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
18414 i * EltSize);
18415 }
18416 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
18417
18418 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
18419 true) <= 2 &&
18421 NewVT, *Store->getMemOperand())) {
18422 SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
18423 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
18424 Store->getPointerInfo(), Store->getOriginalAlign(),
18425 Store->getMemOperand()->getFlags());
18426 }
18427 }
18428
18429 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
18430 // vsetivli zero, 2, e16, m1, ta, ma
18431 // vle16.v v8, (a0)
18432 // vse16.v v8, (a1)
18433 if (auto *L = dyn_cast<LoadSDNode>(Val);
18434 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
18435 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
18436 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
18437 L->getMemoryVT() == MemVT) {
18438 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
18440 NewVT, *Store->getMemOperand()) &&
18442 NewVT, *L->getMemOperand())) {
18443 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
18444 L->getPointerInfo(), L->getOriginalAlign(),
18445 L->getMemOperand()->getFlags());
18446 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
18447 Store->getPointerInfo(), Store->getOriginalAlign(),
18448 Store->getMemOperand()->getFlags());
18449 }
18450 }
18451
18452 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
18453 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
18454 // any illegal types.
18455 if (Val.getOpcode() == RISCVISD::VMV_X_S ||
18456 (DCI.isAfterLegalizeDAG() &&
18458 isNullConstant(Val.getOperand(1)))) {
18459 SDValue Src = Val.getOperand(0);
18460 MVT VecVT = Src.getSimpleValueType();
18461 // VecVT should be scalable and memory VT should match the element type.
18462 if (!Store->isIndexed() && VecVT.isScalableVector() &&
18463 MemVT == VecVT.getVectorElementType()) {
18464 SDLoc DL(N);
18465 MVT MaskVT = getMaskTypeFor(VecVT);
18466 return DAG.getStoreVP(
18467 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
18468 DAG.getConstant(1, DL, MaskVT),
18469 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
18470 Store->getMemOperand(), Store->getAddressingMode(),
18471 Store->isTruncatingStore(), /*IsCompress*/ false);
18472 }
18473 }
18474
18475 break;
18476 }
18477 case ISD::SPLAT_VECTOR: {
18478 EVT VT = N->getValueType(0);
18479 // Only perform this combine on legal MVT types.
18480 if (!isTypeLegal(VT))
18481 break;
18482 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
18483 DAG, Subtarget))
18484 return Gather;
18485 break;
18486 }
18487 case ISD::BUILD_VECTOR:
18488 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
18489 return V;
18490 break;
18492 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
18493 return V;
18494 break;
18496 if (SDValue V = performVECTOR_SHUFFLECombine(N, DAG, Subtarget, *this))
18497 return V;
18498 break;
18500 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
18501 return V;
18502 break;
18503 case RISCVISD::VFMV_V_F_VL: {
18504 const MVT VT = N->getSimpleValueType(0);
18505 SDValue Passthru = N->getOperand(0);
18506 SDValue Scalar = N->getOperand(1);
18507 SDValue VL = N->getOperand(2);
18508
18509 // If VL is 1, we can use vfmv.s.f.
18510 if (isOneConstant(VL))
18511 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
18512 break;
18513 }
18514 case RISCVISD::VMV_V_X_VL: {
18515 const MVT VT = N->getSimpleValueType(0);
18516 SDValue Passthru = N->getOperand(0);
18517 SDValue Scalar = N->getOperand(1);
18518 SDValue VL = N->getOperand(2);
18519
18520 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
18521 // scalar input.
18522 unsigned ScalarSize = Scalar.getValueSizeInBits();
18523 unsigned EltWidth = VT.getScalarSizeInBits();
18524 if (ScalarSize > EltWidth && Passthru.isUndef())
18525 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
18526 return SDValue(N, 0);
18527
18528 // If VL is 1 and the scalar value won't benefit from immediate, we can
18529 // use vmv.s.x.
18530 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
18531 if (isOneConstant(VL) &&
18532 (!Const || Const->isZero() ||
18533 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
18534 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
18535
18536 break;
18537 }
18538 case RISCVISD::VFMV_S_F_VL: {
18539 SDValue Src = N->getOperand(1);
18540 // Try to remove vector->scalar->vector if the scalar->vector is inserting
18541 // into an undef vector.
18542 // TODO: Could use a vslide or vmv.v.v for non-undef.
18543 if (N->getOperand(0).isUndef() &&
18544 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18545 isNullConstant(Src.getOperand(1)) &&
18546 Src.getOperand(0).getValueType().isScalableVector()) {
18547 EVT VT = N->getValueType(0);
18548 EVT SrcVT = Src.getOperand(0).getValueType();
18550 // Widths match, just return the original vector.
18551 if (SrcVT == VT)
18552 return Src.getOperand(0);
18553 // TODO: Use insert_subvector/extract_subvector to change widen/narrow?
18554 }
18555 [[fallthrough]];
18556 }
18557 case RISCVISD::VMV_S_X_VL: {
18558 const MVT VT = N->getSimpleValueType(0);
18559 SDValue Passthru = N->getOperand(0);
18560 SDValue Scalar = N->getOperand(1);
18561 SDValue VL = N->getOperand(2);
18562
18563 if (Scalar.getOpcode() == RISCVISD::VMV_X_S && Passthru.isUndef() &&
18564 Scalar.getOperand(0).getValueType() == N->getValueType(0))
18565 return Scalar.getOperand(0);
18566
18567 // Use M1 or smaller to avoid over constraining register allocation
18568 const MVT M1VT = getLMUL1VT(VT);
18569 if (M1VT.bitsLT(VT)) {
18570 SDValue M1Passthru =
18571 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Passthru,
18572 DAG.getVectorIdxConstant(0, DL));
18573 SDValue Result =
18574 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
18575 Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, Result,
18576 DAG.getVectorIdxConstant(0, DL));
18577 return Result;
18578 }
18579
18580 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
18581 // higher would involve overly constraining the register allocator for
18582 // no purpose.
18583 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
18584 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
18585 VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef())
18586 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
18587
18588 break;
18589 }
18590 case RISCVISD::VMV_X_S: {
18591 SDValue Vec = N->getOperand(0);
18592 MVT VecVT = N->getOperand(0).getSimpleValueType();
18593 const MVT M1VT = getLMUL1VT(VecVT);
18594 if (M1VT.bitsLT(VecVT)) {
18595 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
18596 DAG.getVectorIdxConstant(0, DL));
18597 return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec);
18598 }
18599 break;
18600 }
18604 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
18605 unsigned IntNo = N->getConstantOperandVal(IntOpNo);
18606 switch (IntNo) {
18607 // By default we do not combine any intrinsic.
18608 default:
18609 return SDValue();
18610 case Intrinsic::riscv_vcpop:
18611 case Intrinsic::riscv_vcpop_mask:
18612 case Intrinsic::riscv_vfirst:
18613 case Intrinsic::riscv_vfirst_mask: {
18614 SDValue VL = N->getOperand(2);
18615 if (IntNo == Intrinsic::riscv_vcpop_mask ||
18616 IntNo == Intrinsic::riscv_vfirst_mask)
18617 VL = N->getOperand(3);
18618 if (!isNullConstant(VL))
18619 return SDValue();
18620 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
18621 SDLoc DL(N);
18622 EVT VT = N->getValueType(0);
18623 if (IntNo == Intrinsic::riscv_vfirst ||
18624 IntNo == Intrinsic::riscv_vfirst_mask)
18625 return DAG.getAllOnesConstant(DL, VT);
18626 return DAG.getConstant(0, DL, VT);
18627 }
18628 }
18629 }
18630 case ISD::EXPERIMENTAL_VP_REVERSE:
18631 return performVP_REVERSECombine(N, DAG, Subtarget);
18632 case ISD::VP_STORE:
18633 return performVP_STORECombine(N, DAG, Subtarget);
18634 case ISD::BITCAST: {
18636 SDValue N0 = N->getOperand(0);
18637 EVT VT = N->getValueType(0);
18638 EVT SrcVT = N0.getValueType();
18639 if (VT.isRISCVVectorTuple() && N0->getOpcode() == ISD::SPLAT_VECTOR) {
18640 unsigned NF = VT.getRISCVVectorTupleNumFields();
18641 unsigned NumScalElts = VT.getSizeInBits().getKnownMinValue() / (NF * 8);
18642 SDValue EltVal = DAG.getConstant(0, DL, Subtarget.getXLenVT());
18643 MVT ScalTy = MVT::getScalableVectorVT(MVT::getIntegerVT(8), NumScalElts);
18644
18645 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, DL, ScalTy, EltVal);
18646
18647 SDValue Result = DAG.getUNDEF(VT);
18648 for (unsigned i = 0; i < NF; ++i)
18649 Result = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Result, Splat,
18650 DAG.getVectorIdxConstant(i, DL));
18651 return Result;
18652 }
18653 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
18654 // type, widen both sides to avoid a trip through memory.
18655 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
18656 VT.isScalarInteger()) {
18657 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
18658 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
18659 Ops[0] = N0;
18660 SDLoc DL(N);
18661 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
18662 N0 = DAG.getBitcast(MVT::i8, N0);
18663 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
18664 }
18665
18666 return SDValue();
18667 }
18668 case ISD::CTPOP:
18669 if (SDValue V = combineScalarCTPOPToVCPOP(N, DAG, Subtarget))
18670 return V;
18671 break;
18672 }
18673
18674 return SDValue();
18675}
18676
18678 EVT XVT, unsigned KeptBits) const {
18679 // For vectors, we don't have a preference..
18680 if (XVT.isVector())
18681 return false;
18682
18683 if (XVT != MVT::i32 && XVT != MVT::i64)
18684 return false;
18685
18686 // We can use sext.w for RV64 or an srai 31 on RV32.
18687 if (KeptBits == 32 || KeptBits == 64)
18688 return true;
18689
18690 // With Zbb we can use sext.h/sext.b.
18691 return Subtarget.hasStdExtZbb() &&
18692 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
18693 KeptBits == 16);
18694}
18695
18697 const SDNode *N, CombineLevel Level) const {
18698 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
18699 N->getOpcode() == ISD::SRL) &&
18700 "Expected shift op");
18701
18702 // The following folds are only desirable if `(OP _, c1 << c2)` can be
18703 // materialised in fewer instructions than `(OP _, c1)`:
18704 //
18705 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
18706 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
18707 SDValue N0 = N->getOperand(0);
18708 EVT Ty = N0.getValueType();
18709
18710 // LD/ST will optimize constant Offset extraction, so when AddNode is used by
18711 // LD/ST, it can still complete the folding optimization operation performed
18712 // above.
18713 auto isUsedByLdSt = [](const SDNode *X, const SDNode *User) {
18714 for (SDNode *Use : X->users()) {
18715 // This use is the one we're on right now. Skip it
18716 if (Use == User || Use->getOpcode() == ISD::SELECT)
18717 continue;
18718 if (!isa<StoreSDNode>(Use) && !isa<LoadSDNode>(Use))
18719 return false;
18720 }
18721 return true;
18722 };
18723
18724 if (Ty.isScalarInteger() &&
18725 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
18726 if (N0.getOpcode() == ISD::ADD && !N0->hasOneUse())
18727 return isUsedByLdSt(N0.getNode(), N);
18728
18729 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
18730 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
18731
18732 // Bail if we might break a sh{1,2,3}add pattern.
18733 if (Subtarget.hasStdExtZba() && C2 && C2->getZExtValue() >= 1 &&
18734 C2->getZExtValue() <= 3 && N->hasOneUse() &&
18735 N->user_begin()->getOpcode() == ISD::ADD &&
18736 !isUsedByLdSt(*N->user_begin(), nullptr) &&
18737 !isa<ConstantSDNode>(N->user_begin()->getOperand(1)))
18738 return false;
18739
18740 if (C1 && C2) {
18741 const APInt &C1Int = C1->getAPIntValue();
18742 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
18743
18744 // We can materialise `c1 << c2` into an add immediate, so it's "free",
18745 // and the combine should happen, to potentially allow further combines
18746 // later.
18747 if (ShiftedC1Int.getSignificantBits() <= 64 &&
18748 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
18749 return true;
18750
18751 // We can materialise `c1` in an add immediate, so it's "free", and the
18752 // combine should be prevented.
18753 if (C1Int.getSignificantBits() <= 64 &&
18755 return false;
18756
18757 // Neither constant will fit into an immediate, so find materialisation
18758 // costs.
18759 int C1Cost =
18760 RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
18761 /*CompressionCost*/ true);
18762 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
18763 ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
18764 /*CompressionCost*/ true);
18765
18766 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
18767 // combine should be prevented.
18768 if (C1Cost < ShiftedC1Cost)
18769 return false;
18770 }
18771 }
18772
18773 if (!N0->hasOneUse())
18774 return false;
18775
18776 if (N0->getOpcode() == ISD::SIGN_EXTEND &&
18777 N0->getOperand(0)->getOpcode() == ISD::ADD &&
18778 !N0->getOperand(0)->hasOneUse())
18779 return isUsedByLdSt(N0->getOperand(0).getNode(), N0.getNode());
18780
18781 return true;
18782}
18783
18785 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
18786 TargetLoweringOpt &TLO) const {
18787 // Delay this optimization as late as possible.
18788 if (!TLO.LegalOps)
18789 return false;
18790
18791 EVT VT = Op.getValueType();
18792 if (VT.isVector())
18793 return false;
18794
18795 unsigned Opcode = Op.getOpcode();
18796 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
18797 return false;
18798
18799 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
18800 if (!C)
18801 return false;
18802
18803 const APInt &Mask = C->getAPIntValue();
18804
18805 // Clear all non-demanded bits initially.
18806 APInt ShrunkMask = Mask & DemandedBits;
18807
18808 // Try to make a smaller immediate by setting undemanded bits.
18809
18810 APInt ExpandedMask = Mask | ~DemandedBits;
18811
18812 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
18813 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
18814 };
18815 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
18816 if (NewMask == Mask)
18817 return true;
18818 SDLoc DL(Op);
18819 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
18820 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
18821 Op.getOperand(0), NewC);
18822 return TLO.CombineTo(Op, NewOp);
18823 };
18824
18825 // If the shrunk mask fits in sign extended 12 bits, let the target
18826 // independent code apply it.
18827 if (ShrunkMask.isSignedIntN(12))
18828 return false;
18829
18830 // And has a few special cases for zext.
18831 if (Opcode == ISD::AND) {
18832 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
18833 // otherwise use SLLI + SRLI.
18834 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
18835 if (IsLegalMask(NewMask))
18836 return UseMask(NewMask);
18837
18838 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
18839 if (VT == MVT::i64) {
18840 APInt NewMask = APInt(64, 0xffffffff);
18841 if (IsLegalMask(NewMask))
18842 return UseMask(NewMask);
18843 }
18844 }
18845
18846 // For the remaining optimizations, we need to be able to make a negative
18847 // number through a combination of mask and undemanded bits.
18848 if (!ExpandedMask.isNegative())
18849 return false;
18850
18851 // What is the fewest number of bits we need to represent the negative number.
18852 unsigned MinSignedBits = ExpandedMask.getSignificantBits();
18853
18854 // Try to make a 12 bit negative immediate. If that fails try to make a 32
18855 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
18856 // If we can't create a simm12, we shouldn't change opaque constants.
18857 APInt NewMask = ShrunkMask;
18858 if (MinSignedBits <= 12)
18859 NewMask.setBitsFrom(11);
18860 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
18861 NewMask.setBitsFrom(31);
18862 else
18863 return false;
18864
18865 // Check that our new mask is a subset of the demanded mask.
18866 assert(IsLegalMask(NewMask));
18867 return UseMask(NewMask);
18868}
18869
18870static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
18871 static const uint64_t GREVMasks[] = {
18872 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
18873 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
18874
18875 for (unsigned Stage = 0; Stage != 6; ++Stage) {
18876 unsigned Shift = 1 << Stage;
18877 if (ShAmt & Shift) {
18878 uint64_t Mask = GREVMasks[Stage];
18879 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
18880 if (IsGORC)
18881 Res |= x;
18882 x = Res;
18883 }
18884 }
18885
18886 return x;
18887}
18888
18890 KnownBits &Known,
18891 const APInt &DemandedElts,
18892 const SelectionDAG &DAG,
18893 unsigned Depth) const {
18894 unsigned BitWidth = Known.getBitWidth();
18895 unsigned Opc = Op.getOpcode();
18896 assert((Opc >= ISD::BUILTIN_OP_END ||
18897 Opc == ISD::INTRINSIC_WO_CHAIN ||
18898 Opc == ISD::INTRINSIC_W_CHAIN ||
18899 Opc == ISD::INTRINSIC_VOID) &&
18900 "Should use MaskedValueIsZero if you don't know whether Op"
18901 " is a target node!");
18902
18903 Known.resetAll();
18904 switch (Opc) {
18905 default: break;
18906 case RISCVISD::SELECT_CC: {
18907 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
18908 // If we don't know any bits, early out.
18909 if (Known.isUnknown())
18910 break;
18911 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
18912
18913 // Only known if known in both the LHS and RHS.
18914 Known = Known.intersectWith(Known2);
18915 break;
18916 }
18919 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
18920 // Result is either all zero or operand 0. We can propagate zeros, but not
18921 // ones.
18922 Known.One.clearAllBits();
18923 break;
18924 case RISCVISD::REMUW: {
18925 KnownBits Known2;
18926 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
18927 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
18928 // We only care about the lower 32 bits.
18929 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
18930 // Restore the original width by sign extending.
18931 Known = Known.sext(BitWidth);
18932 break;
18933 }
18934 case RISCVISD::DIVUW: {
18935 KnownBits Known2;
18936 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
18937 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
18938 // We only care about the lower 32 bits.
18939 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
18940 // Restore the original width by sign extending.
18941 Known = Known.sext(BitWidth);
18942 break;
18943 }
18944 case RISCVISD::SLLW: {
18945 KnownBits Known2;
18946 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
18947 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
18948 Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));
18949 // Restore the original width by sign extending.
18950 Known = Known.sext(BitWidth);
18951 break;
18952 }
18953 case RISCVISD::CTZW: {
18954 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
18955 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
18956 unsigned LowBits = llvm::bit_width(PossibleTZ);
18957 Known.Zero.setBitsFrom(LowBits);
18958 break;
18959 }
18960 case RISCVISD::CLZW: {
18961 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
18962 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
18963 unsigned LowBits = llvm::bit_width(PossibleLZ);
18964 Known.Zero.setBitsFrom(LowBits);
18965 break;
18966 }
18967 case RISCVISD::BREV8:
18968 case RISCVISD::ORC_B: {
18969 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
18970 // control value of 7 is equivalent to brev8 and orc.b.
18971 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
18972 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
18973 // To compute zeros, we need to invert the value and invert it back after.
18974 Known.Zero =
18975 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
18976 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
18977 break;
18978 }
18979 case RISCVISD::READ_VLENB: {
18980 // We can use the minimum and maximum VLEN values to bound VLENB. We
18981 // know VLEN must be a power of two.
18982 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
18983 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
18984 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
18985 Known.Zero.setLowBits(Log2_32(MinVLenB));
18986 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
18987 if (MaxVLenB == MinVLenB)
18988 Known.One.setBit(Log2_32(MinVLenB));
18989 break;
18990 }
18991 case RISCVISD::FCLASS: {
18992 // fclass will only set one of the low 10 bits.
18993 Known.Zero.setBitsFrom(10);
18994 break;
18995 }
18998 unsigned IntNo =
18999 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
19000 switch (IntNo) {
19001 default:
19002 // We can't do anything for most intrinsics.
19003 break;
19004 case Intrinsic::riscv_vsetvli:
19005 case Intrinsic::riscv_vsetvlimax: {
19006 bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
19007 unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1);
19008 RISCVII::VLMUL VLMUL =
19009 static_cast<RISCVII::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));
19010 unsigned SEW = RISCVVType::decodeVSEW(VSEW);
19011 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
19012 uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
19013 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
19014
19015 // Result of vsetvli must be not larger than AVL.
19016 if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))
19017 MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1));
19018
19019 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
19020 if (BitWidth > KnownZeroFirstBit)
19021 Known.Zero.setBitsFrom(KnownZeroFirstBit);
19022 break;
19023 }
19024 }
19025 break;
19026 }
19027 }
19028}
19029
19031 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
19032 unsigned Depth) const {
19033 switch (Op.getOpcode()) {
19034 default:
19035 break;
19036 case RISCVISD::SELECT_CC: {
19037 unsigned Tmp =
19038 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
19039 if (Tmp == 1) return 1; // Early out.
19040 unsigned Tmp2 =
19041 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
19042 return std::min(Tmp, Tmp2);
19043 }
19046 // Output is either all zero or operand 0. We can propagate sign bit count
19047 // from operand 0.
19048 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
19049 case RISCVISD::ABSW: {
19050 // We expand this at isel to negw+max. The result will have 33 sign bits
19051 // if the input has at least 33 sign bits.
19052 unsigned Tmp =
19053 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
19054 if (Tmp < 33) return 1;
19055 return 33;
19056 }
19057 case RISCVISD::SLLW:
19058 case RISCVISD::SRAW:
19059 case RISCVISD::SRLW:
19060 case RISCVISD::DIVW:
19061 case RISCVISD::DIVUW:
19062 case RISCVISD::REMUW:
19063 case RISCVISD::ROLW:
19064 case RISCVISD::RORW:
19069 // TODO: As the result is sign-extended, this is conservatively correct. A
19070 // more precise answer could be calculated for SRAW depending on known
19071 // bits in the shift amount.
19072 return 33;
19073 case RISCVISD::VMV_X_S: {
19074 // The number of sign bits of the scalar result is computed by obtaining the
19075 // element type of the input vector operand, subtracting its width from the
19076 // XLEN, and then adding one (sign bit within the element type). If the
19077 // element type is wider than XLen, the least-significant XLEN bits are
19078 // taken.
19079 unsigned XLen = Subtarget.getXLen();
19080 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
19081 if (EltBits <= XLen)
19082 return XLen - EltBits + 1;
19083 break;
19084 }
19086 unsigned IntNo = Op.getConstantOperandVal(1);
19087 switch (IntNo) {
19088 default:
19089 break;
19090 case Intrinsic::riscv_masked_atomicrmw_xchg_i64:
19091 case Intrinsic::riscv_masked_atomicrmw_add_i64:
19092 case Intrinsic::riscv_masked_atomicrmw_sub_i64:
19093 case Intrinsic::riscv_masked_atomicrmw_nand_i64:
19094 case Intrinsic::riscv_masked_atomicrmw_max_i64:
19095 case Intrinsic::riscv_masked_atomicrmw_min_i64:
19096 case Intrinsic::riscv_masked_atomicrmw_umax_i64:
19097 case Intrinsic::riscv_masked_atomicrmw_umin_i64:
19098 case Intrinsic::riscv_masked_cmpxchg_i64:
19099 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
19100 // narrow atomic operation. These are implemented using atomic
19101 // operations at the minimum supported atomicrmw/cmpxchg width whose
19102 // result is then sign extended to XLEN. With +A, the minimum width is
19103 // 32 for both 64 and 32.
19104 assert(Subtarget.getXLen() == 64);
19106 assert(Subtarget.hasStdExtA());
19107 return 33;
19108 }
19109 break;
19110 }
19111 }
19112
19113 return 1;
19114}
19115
19117 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
19118 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
19119
19120 // TODO: Add more target nodes.
19121 switch (Op.getOpcode()) {
19123 // Integer select_cc cannot create poison.
19124 // TODO: What are the FP poison semantics?
19125 // TODO: This instruction blocks poison from the unselected operand, can
19126 // we do anything with that?
19127 return !Op.getValueType().isInteger();
19128 }
19130 Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
19131}
19132
19133const Constant *
19135 assert(Ld && "Unexpected null LoadSDNode");
19136 if (!ISD::isNormalLoad(Ld))
19137 return nullptr;
19138
19139 SDValue Ptr = Ld->getBasePtr();
19140
19141 // Only constant pools with no offset are supported.
19142 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
19143 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
19144 if (!CNode || CNode->isMachineConstantPoolEntry() ||
19145 CNode->getOffset() != 0)
19146 return nullptr;
19147
19148 return CNode;
19149 };
19150
19151 // Simple case, LLA.
19152 if (Ptr.getOpcode() == RISCVISD::LLA) {
19153 auto *CNode = GetSupportedConstantPool(Ptr);
19154 if (!CNode || CNode->getTargetFlags() != 0)
19155 return nullptr;
19156
19157 return CNode->getConstVal();
19158 }
19159
19160 // Look for a HI and ADD_LO pair.
19161 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
19162 Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
19163 return nullptr;
19164
19165 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
19166 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
19167
19168 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
19169 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
19170 return nullptr;
19171
19172 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
19173 return nullptr;
19174
19175 return CNodeLo->getConstVal();
19176}
19177
19179 MachineBasicBlock *BB) {
19180 assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction");
19181
19182 // To read a 64-bit counter CSR on a 32-bit target, we read the two halves.
19183 // Should the count have wrapped while it was being read, we need to try
19184 // again.
19185 // For example:
19186 // ```
19187 // read:
19188 // csrrs x3, counterh # load high word of counter
19189 // csrrs x2, counter # load low word of counter
19190 // csrrs x4, counterh # load high word of counter
19191 // bne x3, x4, read # check if high word reads match, otherwise try again
19192 // ```
19193
19194 MachineFunction &MF = *BB->getParent();
19195 const BasicBlock *LLVMBB = BB->getBasicBlock();
19197
19198 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVMBB);
19199 MF.insert(It, LoopMBB);
19200
19201 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVMBB);
19202 MF.insert(It, DoneMBB);
19203
19204 // Transfer the remainder of BB and its successor edges to DoneMBB.
19205 DoneMBB->splice(DoneMBB->begin(), BB,
19206 std::next(MachineBasicBlock::iterator(MI)), BB->end());
19208
19209 BB->addSuccessor(LoopMBB);
19210
19212 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
19213 Register LoReg = MI.getOperand(0).getReg();
19214 Register HiReg = MI.getOperand(1).getReg();
19215 int64_t LoCounter = MI.getOperand(2).getImm();
19216 int64_t HiCounter = MI.getOperand(3).getImm();
19217 DebugLoc DL = MI.getDebugLoc();
19218
19220 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
19221 .addImm(HiCounter)
19222 .addReg(RISCV::X0);
19223 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
19224 .addImm(LoCounter)
19225 .addReg(RISCV::X0);
19226 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
19227 .addImm(HiCounter)
19228 .addReg(RISCV::X0);
19229
19230 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
19231 .addReg(HiReg)
19232 .addReg(ReadAgainReg)
19233 .addMBB(LoopMBB);
19234
19235 LoopMBB->addSuccessor(LoopMBB);
19236 LoopMBB->addSuccessor(DoneMBB);
19237
19238 MI.eraseFromParent();
19239
19240 return DoneMBB;
19241}
19242
19245 const RISCVSubtarget &Subtarget) {
19246 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
19247
19248 MachineFunction &MF = *BB->getParent();
19249 DebugLoc DL = MI.getDebugLoc();
19252 Register LoReg = MI.getOperand(0).getReg();
19253 Register HiReg = MI.getOperand(1).getReg();
19254 Register SrcReg = MI.getOperand(2).getReg();
19255
19256 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
19257 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
19258
19259 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
19260 RI, Register());
19262 MachineMemOperand *MMOLo =
19266 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
19267 .addFrameIndex(FI)
19268 .addImm(0)
19269 .addMemOperand(MMOLo);
19270 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
19271 .addFrameIndex(FI)
19272 .addImm(4)
19273 .addMemOperand(MMOHi);
19274 MI.eraseFromParent(); // The pseudo instruction is gone now.
19275 return BB;
19276}
19277
19280 const RISCVSubtarget &Subtarget) {
19281 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
19282 "Unexpected instruction");
19283
19284 MachineFunction &MF = *BB->getParent();
19285 DebugLoc DL = MI.getDebugLoc();
19288 Register DstReg = MI.getOperand(0).getReg();
19289 Register LoReg = MI.getOperand(1).getReg();
19290 Register HiReg = MI.getOperand(2).getReg();
19291
19292 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
19293 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
19294
19296 MachineMemOperand *MMOLo =
19300 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
19301 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
19302 .addFrameIndex(FI)
19303 .addImm(0)
19304 .addMemOperand(MMOLo);
19305 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
19306 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
19307 .addFrameIndex(FI)
19308 .addImm(4)
19309 .addMemOperand(MMOHi);
19310 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
19311 MI.eraseFromParent(); // The pseudo instruction is gone now.
19312 return BB;
19313}
19314
19316 switch (MI.getOpcode()) {
19317 default:
19318 return false;
19319 case RISCV::Select_GPR_Using_CC_GPR:
19320 case RISCV::Select_GPR_Using_CC_Imm:
19321 case RISCV::Select_FPR16_Using_CC_GPR:
19322 case RISCV::Select_FPR16INX_Using_CC_GPR:
19323 case RISCV::Select_FPR32_Using_CC_GPR:
19324 case RISCV::Select_FPR32INX_Using_CC_GPR:
19325 case RISCV::Select_FPR64_Using_CC_GPR:
19326 case RISCV::Select_FPR64INX_Using_CC_GPR:
19327 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
19328 return true;
19329 }
19330}
19331
19333 unsigned RelOpcode, unsigned EqOpcode,
19334 const RISCVSubtarget &Subtarget) {
19335 DebugLoc DL = MI.getDebugLoc();
19336 Register DstReg = MI.getOperand(0).getReg();
19337 Register Src1Reg = MI.getOperand(1).getReg();
19338 Register Src2Reg = MI.getOperand(2).getReg();
19340 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
19342
19343 // Save the current FFLAGS.
19344 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
19345
19346 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
19347 .addReg(Src1Reg)
19348 .addReg(Src2Reg);
19351
19352 // Restore the FFLAGS.
19353 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
19354 .addReg(SavedFFlags, RegState::Kill);
19355
19356 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
19357 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
19358 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
19359 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
19362
19363 // Erase the pseudoinstruction.
19364 MI.eraseFromParent();
19365 return BB;
19366}
19367
19368static MachineBasicBlock *
19370 MachineBasicBlock *ThisMBB,
19371 const RISCVSubtarget &Subtarget) {
19372 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
19373 // Without this, custom-inserter would have generated:
19374 //
19375 // A
19376 // | \
19377 // | B
19378 // | /
19379 // C
19380 // | \
19381 // | D
19382 // | /
19383 // E
19384 //
19385 // A: X = ...; Y = ...
19386 // B: empty
19387 // C: Z = PHI [X, A], [Y, B]
19388 // D: empty
19389 // E: PHI [X, C], [Z, D]
19390 //
19391 // If we lower both Select_FPRX_ in a single step, we can instead generate:
19392 //
19393 // A
19394 // | \
19395 // | C
19396 // | /|
19397 // |/ |
19398 // | |
19399 // | D
19400 // | /
19401 // E
19402 //
19403 // A: X = ...; Y = ...
19404 // D: empty
19405 // E: PHI [X, A], [X, C], [Y, D]
19406
19407 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
19408 const DebugLoc &DL = First.getDebugLoc();
19409 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
19410 MachineFunction *F = ThisMBB->getParent();
19411 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
19412 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
19413 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
19414 MachineFunction::iterator It = ++ThisMBB->getIterator();
19415 F->insert(It, FirstMBB);
19416 F->insert(It, SecondMBB);
19417 F->insert(It, SinkMBB);
19418
19419 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
19420 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
19422 ThisMBB->end());
19423 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
19424
19425 // Fallthrough block for ThisMBB.
19426 ThisMBB->addSuccessor(FirstMBB);
19427 // Fallthrough block for FirstMBB.
19428 FirstMBB->addSuccessor(SecondMBB);
19429 ThisMBB->addSuccessor(SinkMBB);
19430 FirstMBB->addSuccessor(SinkMBB);
19431 // This is fallthrough.
19432 SecondMBB->addSuccessor(SinkMBB);
19433
19434 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
19435 Register FLHS = First.getOperand(1).getReg();
19436 Register FRHS = First.getOperand(2).getReg();
19437 // Insert appropriate branch.
19438 BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC))
19439 .addReg(FLHS)
19440 .addReg(FRHS)
19441 .addMBB(SinkMBB);
19442
19443 Register SLHS = Second.getOperand(1).getReg();
19444 Register SRHS = Second.getOperand(2).getReg();
19445 Register Op1Reg4 = First.getOperand(4).getReg();
19446 Register Op1Reg5 = First.getOperand(5).getReg();
19447
19448 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
19449 // Insert appropriate branch.
19450 BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC))
19451 .addReg(SLHS)
19452 .addReg(SRHS)
19453 .addMBB(SinkMBB);
19454
19455 Register DestReg = Second.getOperand(0).getReg();
19456 Register Op2Reg4 = Second.getOperand(4).getReg();
19457 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
19458 .addReg(Op2Reg4)
19459 .addMBB(ThisMBB)
19460 .addReg(Op1Reg4)
19461 .addMBB(FirstMBB)
19462 .addReg(Op1Reg5)
19463 .addMBB(SecondMBB);
19464
19465 // Now remove the Select_FPRX_s.
19466 First.eraseFromParent();
19467 Second.eraseFromParent();
19468 return SinkMBB;
19469}
19470
19473 const RISCVSubtarget &Subtarget) {
19474 // To "insert" Select_* instructions, we actually have to insert the triangle
19475 // control-flow pattern. The incoming instructions know the destination vreg
19476 // to set, the condition code register to branch on, the true/false values to
19477 // select between, and the condcode to use to select the appropriate branch.
19478 //
19479 // We produce the following control flow:
19480 // HeadMBB
19481 // | \
19482 // | IfFalseMBB
19483 // | /
19484 // TailMBB
19485 //
19486 // When we find a sequence of selects we attempt to optimize their emission
19487 // by sharing the control flow. Currently we only handle cases where we have
19488 // multiple selects with the exact same condition (same LHS, RHS and CC).
19489 // The selects may be interleaved with other instructions if the other
19490 // instructions meet some requirements we deem safe:
19491 // - They are not pseudo instructions.
19492 // - They are debug instructions. Otherwise,
19493 // - They do not have side-effects, do not access memory and their inputs do
19494 // not depend on the results of the select pseudo-instructions.
19495 // The TrueV/FalseV operands of the selects cannot depend on the result of
19496 // previous selects in the sequence.
19497 // These conditions could be further relaxed. See the X86 target for a
19498 // related approach and more information.
19499 //
19500 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
19501 // is checked here and handled by a separate function -
19502 // EmitLoweredCascadedSelect.
19503
19504 auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
19505 if ((MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR &&
19506 MI.getOpcode() != RISCV::Select_GPR_Using_CC_Imm) &&
19507 Next != BB->end() && Next->getOpcode() == MI.getOpcode() &&
19508 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
19509 Next->getOperand(5).isKill())
19510 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
19511
19512 Register LHS = MI.getOperand(1).getReg();
19513 Register RHS;
19514 if (MI.getOperand(2).isReg())
19515 RHS = MI.getOperand(2).getReg();
19516 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
19517
19518 SmallVector<MachineInstr *, 4> SelectDebugValues;
19519 SmallSet<Register, 4> SelectDests;
19520 SelectDests.insert(MI.getOperand(0).getReg());
19521
19522 MachineInstr *LastSelectPseudo = &MI;
19523 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
19524 SequenceMBBI != E; ++SequenceMBBI) {
19525 if (SequenceMBBI->isDebugInstr())
19526 continue;
19527 if (isSelectPseudo(*SequenceMBBI)) {
19528 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
19529 !SequenceMBBI->getOperand(2).isReg() ||
19530 SequenceMBBI->getOperand(2).getReg() != RHS ||
19531 SequenceMBBI->getOperand(3).getImm() != CC ||
19532 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
19533 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
19534 break;
19535 LastSelectPseudo = &*SequenceMBBI;
19536 SequenceMBBI->collectDebugValues(SelectDebugValues);
19537 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
19538 continue;
19539 }
19540 if (SequenceMBBI->hasUnmodeledSideEffects() ||
19541 SequenceMBBI->mayLoadOrStore() ||
19542 SequenceMBBI->usesCustomInsertionHook())
19543 break;
19544 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
19545 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
19546 }))
19547 break;
19548 }
19549
19550 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
19551 const BasicBlock *LLVM_BB = BB->getBasicBlock();
19552 DebugLoc DL = MI.getDebugLoc();
19554
19555 MachineBasicBlock *HeadMBB = BB;
19556 MachineFunction *F = BB->getParent();
19557 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
19558 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
19559
19560 F->insert(I, IfFalseMBB);
19561 F->insert(I, TailMBB);
19562
19563 // Set the call frame size on entry to the new basic blocks.
19564 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
19565 IfFalseMBB->setCallFrameSize(CallFrameSize);
19566 TailMBB->setCallFrameSize(CallFrameSize);
19567
19568 // Transfer debug instructions associated with the selects to TailMBB.
19569 for (MachineInstr *DebugInstr : SelectDebugValues) {
19570 TailMBB->push_back(DebugInstr->removeFromParent());
19571 }
19572
19573 // Move all instructions after the sequence to TailMBB.
19574 TailMBB->splice(TailMBB->end(), HeadMBB,
19575 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
19576 // Update machine-CFG edges by transferring all successors of the current
19577 // block to the new block which will contain the Phi nodes for the selects.
19578 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
19579 // Set the successors for HeadMBB.
19580 HeadMBB->addSuccessor(IfFalseMBB);
19581 HeadMBB->addSuccessor(TailMBB);
19582
19583 // Insert appropriate branch.
19584 if (MI.getOperand(2).isImm())
19585 BuildMI(HeadMBB, DL, TII.getBrCond(CC, MI.getOperand(2).isImm()))
19586 .addReg(LHS)
19587 .addImm(MI.getOperand(2).getImm())
19588 .addMBB(TailMBB);
19589 else
19590 BuildMI(HeadMBB, DL, TII.getBrCond(CC))
19591 .addReg(LHS)
19592 .addReg(RHS)
19593 .addMBB(TailMBB);
19594
19595 // IfFalseMBB just falls through to TailMBB.
19596 IfFalseMBB->addSuccessor(TailMBB);
19597
19598 // Create PHIs for all of the select pseudo-instructions.
19599 auto SelectMBBI = MI.getIterator();
19600 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
19601 auto InsertionPoint = TailMBB->begin();
19602 while (SelectMBBI != SelectEnd) {
19603 auto Next = std::next(SelectMBBI);
19604 if (isSelectPseudo(*SelectMBBI)) {
19605 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
19606 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
19607 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
19608 .addReg(SelectMBBI->getOperand(4).getReg())
19609 .addMBB(HeadMBB)
19610 .addReg(SelectMBBI->getOperand(5).getReg())
19611 .addMBB(IfFalseMBB);
19612 SelectMBBI->eraseFromParent();
19613 }
19614 SelectMBBI = Next;
19615 }
19616
19617 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
19618 return TailMBB;
19619}
19620
19621// Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW.
19622static const RISCV::RISCVMaskedPseudoInfo *
19623lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW) {
19625 RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW);
19626 assert(Inverse && "Unexpected LMUL and SEW pair for instruction");
19628 RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo);
19629 assert(Masked && "Could not find masked instruction for LMUL and SEW pair");
19630 return Masked;
19631}
19632
19635 unsigned CVTXOpc) {
19636 DebugLoc DL = MI.getDebugLoc();
19637
19639
19641 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
19642
19643 // Save the old value of FFLAGS.
19644 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
19645
19646 assert(MI.getNumOperands() == 7);
19647
19648 // Emit a VFCVT_X_F
19649 const TargetRegisterInfo *TRI =
19651 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
19652 Register Tmp = MRI.createVirtualRegister(RC);
19653 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
19654 .add(MI.getOperand(1))
19655 .add(MI.getOperand(2))
19656 .add(MI.getOperand(3))
19657 .add(MachineOperand::CreateImm(7)) // frm = DYN
19658 .add(MI.getOperand(4))
19659 .add(MI.getOperand(5))
19660 .add(MI.getOperand(6))
19661 .add(MachineOperand::CreateReg(RISCV::FRM,
19662 /*IsDef*/ false,
19663 /*IsImp*/ true));
19664
19665 // Emit a VFCVT_F_X
19666 RISCVII::VLMUL LMul = RISCVII::getLMul(MI.getDesc().TSFlags);
19667 unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
19668 // There is no E8 variant for VFCVT_F_X.
19669 assert(Log2SEW >= 4);
19670 unsigned CVTFOpc =
19671 lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, 1 << Log2SEW)
19672 ->MaskedPseudo;
19673
19674 BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
19675 .add(MI.getOperand(0))
19676 .add(MI.getOperand(1))
19677 .addReg(Tmp)
19678 .add(MI.getOperand(3))
19679 .add(MachineOperand::CreateImm(7)) // frm = DYN
19680 .add(MI.getOperand(4))
19681 .add(MI.getOperand(5))
19682 .add(MI.getOperand(6))
19683 .add(MachineOperand::CreateReg(RISCV::FRM,
19684 /*IsDef*/ false,
19685 /*IsImp*/ true));
19686
19687 // Restore FFLAGS.
19688 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
19689 .addReg(SavedFFLAGS, RegState::Kill);
19690
19691 // Erase the pseudoinstruction.
19692 MI.eraseFromParent();
19693 return BB;
19694}
19695
19697 const RISCVSubtarget &Subtarget) {
19698 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
19699 const TargetRegisterClass *RC;
19700 switch (MI.getOpcode()) {
19701 default:
19702 llvm_unreachable("Unexpected opcode");
19703 case RISCV::PseudoFROUND_H:
19704 CmpOpc = RISCV::FLT_H;
19705 F2IOpc = RISCV::FCVT_W_H;
19706 I2FOpc = RISCV::FCVT_H_W;
19707 FSGNJOpc = RISCV::FSGNJ_H;
19708 FSGNJXOpc = RISCV::FSGNJX_H;
19709 RC = &RISCV::FPR16RegClass;
19710 break;
19711 case RISCV::PseudoFROUND_H_INX:
19712 CmpOpc = RISCV::FLT_H_INX;
19713 F2IOpc = RISCV::FCVT_W_H_INX;
19714 I2FOpc = RISCV::FCVT_H_W_INX;
19715 FSGNJOpc = RISCV::FSGNJ_H_INX;
19716 FSGNJXOpc = RISCV::FSGNJX_H_INX;
19717 RC = &RISCV::GPRF16RegClass;
19718 break;
19719 case RISCV::PseudoFROUND_S:
19720 CmpOpc = RISCV::FLT_S;
19721 F2IOpc = RISCV::FCVT_W_S;
19722 I2FOpc = RISCV::FCVT_S_W;
19723 FSGNJOpc = RISCV::FSGNJ_S;
19724 FSGNJXOpc = RISCV::FSGNJX_S;
19725 RC = &RISCV::FPR32RegClass;
19726 break;
19727 case RISCV::PseudoFROUND_S_INX:
19728 CmpOpc = RISCV::FLT_S_INX;
19729 F2IOpc = RISCV::FCVT_W_S_INX;
19730 I2FOpc = RISCV::FCVT_S_W_INX;
19731 FSGNJOpc = RISCV::FSGNJ_S_INX;
19732 FSGNJXOpc = RISCV::FSGNJX_S_INX;
19733 RC = &RISCV::GPRF32RegClass;
19734 break;
19735 case RISCV::PseudoFROUND_D:
19736 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
19737 CmpOpc = RISCV::FLT_D;
19738 F2IOpc = RISCV::FCVT_L_D;
19739 I2FOpc = RISCV::FCVT_D_L;
19740 FSGNJOpc = RISCV::FSGNJ_D;
19741 FSGNJXOpc = RISCV::FSGNJX_D;
19742 RC = &RISCV::FPR64RegClass;
19743 break;
19744 case RISCV::PseudoFROUND_D_INX:
19745 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
19746 CmpOpc = RISCV::FLT_D_INX;
19747 F2IOpc = RISCV::FCVT_L_D_INX;
19748 I2FOpc = RISCV::FCVT_D_L_INX;
19749 FSGNJOpc = RISCV::FSGNJ_D_INX;
19750 FSGNJXOpc = RISCV::FSGNJX_D_INX;
19751 RC = &RISCV::GPRRegClass;
19752 break;
19753 }
19754
19755 const BasicBlock *BB = MBB->getBasicBlock();
19756 DebugLoc DL = MI.getDebugLoc();
19758
19760 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
19761 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
19762
19763 F->insert(I, CvtMBB);
19764 F->insert(I, DoneMBB);
19765 // Move all instructions after the sequence to DoneMBB.
19766 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
19767 MBB->end());
19768 // Update machine-CFG edges by transferring all successors of the current
19769 // block to the new block which will contain the Phi nodes for the selects.
19771 // Set the successors for MBB.
19772 MBB->addSuccessor(CvtMBB);
19773 MBB->addSuccessor(DoneMBB);
19774
19775 Register DstReg = MI.getOperand(0).getReg();
19776 Register SrcReg = MI.getOperand(1).getReg();
19777 Register MaxReg = MI.getOperand(2).getReg();
19778 int64_t FRM = MI.getOperand(3).getImm();
19779
19780 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
19782
19783 Register FabsReg = MRI.createVirtualRegister(RC);
19784 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
19785
19786 // Compare the FP value to the max value.
19787 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
19788 auto MIB =
19789 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
19792
19793 // Insert branch.
19794 BuildMI(MBB, DL, TII.get(RISCV::BEQ))
19795 .addReg(CmpReg)
19796 .addReg(RISCV::X0)
19797 .addMBB(DoneMBB);
19798
19799 CvtMBB->addSuccessor(DoneMBB);
19800
19801 // Convert to integer.
19802 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
19803 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
19806
19807 // Convert back to FP.
19808 Register I2FReg = MRI.createVirtualRegister(RC);
19809 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
19812
19813 // Restore the sign bit.
19814 Register CvtReg = MRI.createVirtualRegister(RC);
19815 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
19816
19817 // Merge the results.
19818 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
19819 .addReg(SrcReg)
19820 .addMBB(MBB)
19821 .addReg(CvtReg)
19822 .addMBB(CvtMBB);
19823
19824 MI.eraseFromParent();
19825 return DoneMBB;
19826}
19827
19830 MachineBasicBlock *BB) const {
19831 switch (MI.getOpcode()) {
19832 default:
19833 llvm_unreachable("Unexpected instr type to insert");
19834 case RISCV::ReadCounterWide:
19835 assert(!Subtarget.is64Bit() &&
19836 "ReadCounterWide is only to be used on riscv32");
19837 return emitReadCounterWidePseudo(MI, BB);
19838 case RISCV::Select_GPR_Using_CC_GPR:
19839 case RISCV::Select_GPR_Using_CC_Imm:
19840 case RISCV::Select_FPR16_Using_CC_GPR:
19841 case RISCV::Select_FPR16INX_Using_CC_GPR:
19842 case RISCV::Select_FPR32_Using_CC_GPR:
19843 case RISCV::Select_FPR32INX_Using_CC_GPR:
19844 case RISCV::Select_FPR64_Using_CC_GPR:
19845 case RISCV::Select_FPR64INX_Using_CC_GPR:
19846 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
19847 return emitSelectPseudo(MI, BB, Subtarget);
19848 case RISCV::BuildPairF64Pseudo:
19849 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
19850 case RISCV::SplitF64Pseudo:
19851 return emitSplitF64Pseudo(MI, BB, Subtarget);
19852 case RISCV::PseudoQuietFLE_H:
19853 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
19854 case RISCV::PseudoQuietFLE_H_INX:
19855 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
19856 case RISCV::PseudoQuietFLT_H:
19857 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
19858 case RISCV::PseudoQuietFLT_H_INX:
19859 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
19860 case RISCV::PseudoQuietFLE_S:
19861 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
19862 case RISCV::PseudoQuietFLE_S_INX:
19863 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
19864 case RISCV::PseudoQuietFLT_S:
19865 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
19866 case RISCV::PseudoQuietFLT_S_INX:
19867 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
19868 case RISCV::PseudoQuietFLE_D:
19869 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
19870 case RISCV::PseudoQuietFLE_D_INX:
19871 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
19872 case RISCV::PseudoQuietFLE_D_IN32X:
19873 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
19874 Subtarget);
19875 case RISCV::PseudoQuietFLT_D:
19876 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
19877 case RISCV::PseudoQuietFLT_D_INX:
19878 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
19879 case RISCV::PseudoQuietFLT_D_IN32X:
19880 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
19881 Subtarget);
19882
19883 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
19884 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
19885 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
19886 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
19887 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
19888 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
19889 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
19890 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
19891 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
19892 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
19893 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
19894 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
19895 case RISCV::PseudoFROUND_H:
19896 case RISCV::PseudoFROUND_H_INX:
19897 case RISCV::PseudoFROUND_S:
19898 case RISCV::PseudoFROUND_S_INX:
19899 case RISCV::PseudoFROUND_D:
19900 case RISCV::PseudoFROUND_D_INX:
19901 case RISCV::PseudoFROUND_D_IN32X:
19902 return emitFROUND(MI, BB, Subtarget);
19903 case RISCV::PROBED_STACKALLOC_DYN:
19904 return emitDynamicProbedAlloc(MI, BB);
19905 case TargetOpcode::STATEPOINT:
19906 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
19907 // while jal call instruction (where statepoint will be lowered at the end)
19908 // has implicit def. This def is early-clobber as it will be set at
19909 // the moment of the call and earlier than any use is read.
19910 // Add this implicit dead def here as a workaround.
19911 MI.addOperand(*MI.getMF(),
19913 RISCV::X1, /*isDef*/ true,
19914 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
19915 /*isUndef*/ false, /*isEarlyClobber*/ true));
19916 [[fallthrough]];
19917 case TargetOpcode::STACKMAP:
19918 case TargetOpcode::PATCHPOINT:
19919 if (!Subtarget.is64Bit())
19920 report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only "
19921 "supported on 64-bit targets");
19922 return emitPatchPoint(MI, BB);
19923 }
19924}
19925
19927 SDNode *Node) const {
19928 // Add FRM dependency to any instructions with dynamic rounding mode.
19929 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
19930 if (Idx < 0) {
19931 // Vector pseudos have FRM index indicated by TSFlags.
19932 Idx = RISCVII::getFRMOpNum(MI.getDesc());
19933 if (Idx < 0)
19934 return;
19935 }
19936 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
19937 return;
19938 // If the instruction already reads FRM, don't add another read.
19939 if (MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr))
19940 return;
19941 MI.addOperand(
19942 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
19943}
19944
19945void RISCVTargetLowering::analyzeInputArgs(
19946 MachineFunction &MF, CCState &CCInfo,
19947 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
19948 RISCVCCAssignFn Fn) const {
19949 unsigned NumArgs = Ins.size();
19951
19952 for (unsigned i = 0; i != NumArgs; ++i) {
19953 MVT ArgVT = Ins[i].VT;
19954 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
19955
19956 Type *ArgTy = nullptr;
19957 if (IsRet)
19958 ArgTy = FType->getReturnType();
19959 else if (Ins[i].isOrigArg())
19960 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
19961
19962 if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo,
19963 /*IsFixed=*/true, IsRet, ArgTy)) {
19964 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
19965 << ArgVT << '\n');
19966 llvm_unreachable(nullptr);
19967 }
19968 }
19969}
19970
19971void RISCVTargetLowering::analyzeOutputArgs(
19972 MachineFunction &MF, CCState &CCInfo,
19973 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
19974 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
19975 unsigned NumArgs = Outs.size();
19976
19977 for (unsigned i = 0; i != NumArgs; i++) {
19978 MVT ArgVT = Outs[i].VT;
19979 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
19980 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
19981
19982 if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo,
19983 Outs[i].IsFixed, IsRet, OrigTy)) {
19984 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
19985 << ArgVT << "\n");
19986 llvm_unreachable(nullptr);
19987 }
19988 }
19989}
19990
19991// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
19992// values.
19994 const CCValAssign &VA, const SDLoc &DL,
19995 const RISCVSubtarget &Subtarget) {
19996 if (VA.needsCustom()) {
19997 if (VA.getLocVT().isInteger() &&
19998 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
19999 return DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
20000 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
20001 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
20003 return convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
20004 llvm_unreachable("Unexpected Custom handling.");
20005 }
20006
20007 switch (VA.getLocInfo()) {
20008 default:
20009 llvm_unreachable("Unexpected CCValAssign::LocInfo");
20010 case CCValAssign::Full:
20011 break;
20012 case CCValAssign::BCvt:
20013 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
20014 break;
20015 }
20016 return Val;
20017}
20018
20019// The caller is responsible for loading the full value if the argument is
20020// passed with CCValAssign::Indirect.
20022 const CCValAssign &VA, const SDLoc &DL,
20023 const ISD::InputArg &In,
20024 const RISCVTargetLowering &TLI) {
20027 EVT LocVT = VA.getLocVT();
20028 SDValue Val;
20029 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
20030 Register VReg = RegInfo.createVirtualRegister(RC);
20031 RegInfo.addLiveIn(VA.getLocReg(), VReg);
20032 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
20033
20034 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
20035 if (In.isOrigArg()) {
20036 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
20037 if (OrigArg->getType()->isIntegerTy()) {
20038 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
20039 // An input zero extended from i31 can also be considered sign extended.
20040 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
20041 (BitWidth < 32 && In.Flags.isZExt())) {
20043 RVFI->addSExt32Register(VReg);
20044 }
20045 }
20046 }
20047
20049 return Val;
20050
20051 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
20052}
20053
20055 const CCValAssign &VA, const SDLoc &DL,
20056 const RISCVSubtarget &Subtarget) {
20057 EVT LocVT = VA.getLocVT();
20058
20059 if (VA.needsCustom()) {
20060 if (LocVT.isInteger() &&
20061 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
20062 return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
20063 if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32)
20064 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
20065 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
20066 return convertToScalableVector(LocVT, Val, DAG, Subtarget);
20067 llvm_unreachable("Unexpected Custom handling.");
20068 }
20069
20070 switch (VA.getLocInfo()) {
20071 default:
20072 llvm_unreachable("Unexpected CCValAssign::LocInfo");
20073 case CCValAssign::Full:
20074 break;
20075 case CCValAssign::BCvt:
20076 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
20077 break;
20078 }
20079 return Val;
20080}
20081
20082// The caller is responsible for loading the full value if the argument is
20083// passed with CCValAssign::Indirect.
20085 const CCValAssign &VA, const SDLoc &DL) {
20087 MachineFrameInfo &MFI = MF.getFrameInfo();
20088 EVT LocVT = VA.getLocVT();
20089 EVT ValVT = VA.getValVT();
20091 if (VA.getLocInfo() == CCValAssign::Indirect) {
20092 // When the value is a scalable vector, we save the pointer which points to
20093 // the scalable vector value in the stack. The ValVT will be the pointer
20094 // type, instead of the scalable vector type.
20095 ValVT = LocVT;
20096 }
20097 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
20098 /*IsImmutable=*/true);
20099 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
20100 SDValue Val;
20101
20103 switch (VA.getLocInfo()) {
20104 default:
20105 llvm_unreachable("Unexpected CCValAssign::LocInfo");
20106 case CCValAssign::Full:
20108 case CCValAssign::BCvt:
20109 break;
20110 }
20111 Val = DAG.getExtLoad(
20112 ExtType, DL, LocVT, Chain, FIN,
20114 return Val;
20115}
20116
20118 const CCValAssign &VA,
20119 const CCValAssign &HiVA,
20120 const SDLoc &DL) {
20121 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
20122 "Unexpected VA");
20124 MachineFrameInfo &MFI = MF.getFrameInfo();
20126
20127 assert(VA.isRegLoc() && "Expected register VA assignment");
20128
20129 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
20130 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
20131 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
20132 SDValue Hi;
20133 if (HiVA.isMemLoc()) {
20134 // Second half of f64 is passed on the stack.
20135 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
20136 /*IsImmutable=*/true);
20137 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
20138 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
20140 } else {
20141 // Second half of f64 is passed in another GPR.
20142 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
20143 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
20144 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
20145 }
20146 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
20147}
20148
20149// Transform physical registers into virtual registers.
20151 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
20152 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
20153 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
20154
20156
20157 switch (CallConv) {
20158 default:
20159 report_fatal_error("Unsupported calling convention");
20160 case CallingConv::C:
20161 case CallingConv::Fast:
20163 case CallingConv::GRAAL:
20165 break;
20166 case CallingConv::GHC:
20167 if (Subtarget.hasStdExtE())
20168 report_fatal_error("GHC calling convention is not supported on RVE!");
20169 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
20170 report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
20171 "(Zdinx/D) instruction set extensions");
20172 }
20173
20174 const Function &Func = MF.getFunction();
20175 if (Func.hasFnAttribute("interrupt")) {
20176 if (!Func.arg_empty())
20178 "Functions with the interrupt attribute cannot have arguments!");
20179
20180 StringRef Kind =
20181 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
20182
20183 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
20185 "Function interrupt attribute argument not supported!");
20186 }
20187
20188 EVT PtrVT = getPointerTy(DAG.getDataLayout());
20189 MVT XLenVT = Subtarget.getXLenVT();
20190 unsigned XLenInBytes = Subtarget.getXLen() / 8;
20191 // Used with vargs to acumulate store chains.
20192 std::vector<SDValue> OutChains;
20193
20194 // Assign locations to all of the incoming arguments.
20196 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
20197
20198 if (CallConv == CallingConv::GHC)
20200 else
20201 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
20203 : CC_RISCV);
20204
20205 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
20206 CCValAssign &VA = ArgLocs[i];
20207 SDValue ArgValue;
20208 // Passing f64 on RV32D with a soft float ABI must be handled as a special
20209 // case.
20210 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20211 assert(VA.needsCustom());
20212 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
20213 } else if (VA.isRegLoc())
20214 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
20215 else
20216 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
20217
20218 if (VA.getLocInfo() == CCValAssign::Indirect) {
20219 // If the original argument was split and passed by reference (e.g. i128
20220 // on RV32), we need to load all parts of it here (using the same
20221 // address). Vectors may be partly split to registers and partly to the
20222 // stack, in which case the base address is partly offset and subsequent
20223 // stores are relative to that.
20224 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
20226 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
20227 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
20228 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
20229 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
20230 CCValAssign &PartVA = ArgLocs[i + 1];
20231 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
20232 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
20233 if (PartVA.getValVT().isScalableVector())
20234 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
20235 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
20236 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
20238 ++i;
20239 ++InsIdx;
20240 }
20241 continue;
20242 }
20243 InVals.push_back(ArgValue);
20244 }
20245
20246 if (any_of(ArgLocs,
20247 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
20248 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
20249
20250 if (IsVarArg) {
20251 ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());
20252 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
20253 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
20254 MachineFrameInfo &MFI = MF.getFrameInfo();
20255 MachineRegisterInfo &RegInfo = MF.getRegInfo();
20257
20258 // Size of the vararg save area. For now, the varargs save area is either
20259 // zero or large enough to hold a0-a7.
20260 int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
20261 int FI;
20262
20263 // If all registers are allocated, then all varargs must be passed on the
20264 // stack and we don't need to save any argregs.
20265 if (VarArgsSaveSize == 0) {
20266 int VaArgOffset = CCInfo.getStackSize();
20267 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
20268 } else {
20269 int VaArgOffset = -VarArgsSaveSize;
20270 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
20271
20272 // If saving an odd number of registers then create an extra stack slot to
20273 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
20274 // offsets to even-numbered registered remain 2*XLEN-aligned.
20275 if (Idx % 2) {
20277 XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);
20278 VarArgsSaveSize += XLenInBytes;
20279 }
20280
20281 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
20282
20283 // Copy the integer registers that may have been used for passing varargs
20284 // to the vararg save area.
20285 for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
20286 const Register Reg = RegInfo.createVirtualRegister(RC);
20287 RegInfo.addLiveIn(ArgRegs[I], Reg);
20288 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
20289 SDValue Store = DAG.getStore(
20290 Chain, DL, ArgValue, FIN,
20291 MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));
20292 OutChains.push_back(Store);
20293 FIN =
20294 DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);
20295 }
20296 }
20297
20298 // Record the frame index of the first variable argument
20299 // which is a value necessary to VASTART.
20300 RVFI->setVarArgsFrameIndex(FI);
20301 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
20302 }
20303
20304 // All stores are grouped in one node to allow the matching between
20305 // the size of Ins and InVals. This only happens for vararg functions.
20306 if (!OutChains.empty()) {
20307 OutChains.push_back(Chain);
20308 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
20309 }
20310
20311 return Chain;
20312}
20313
20314/// isEligibleForTailCallOptimization - Check whether the call is eligible
20315/// for tail call optimization.
20316/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
20317bool RISCVTargetLowering::isEligibleForTailCallOptimization(
20318 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
20319 const SmallVector<CCValAssign, 16> &ArgLocs) const {
20320
20321 auto CalleeCC = CLI.CallConv;
20322 auto &Outs = CLI.Outs;
20323 auto &Caller = MF.getFunction();
20324 auto CallerCC = Caller.getCallingConv();
20325
20326 // Exception-handling functions need a special set of instructions to
20327 // indicate a return to the hardware. Tail-calling another function would
20328 // probably break this.
20329 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
20330 // should be expanded as new function attributes are introduced.
20331 if (Caller.hasFnAttribute("interrupt"))
20332 return false;
20333
20334 // Do not tail call opt if the stack is used to pass parameters.
20335 if (CCInfo.getStackSize() != 0)
20336 return false;
20337
20338 // Do not tail call opt if any parameters need to be passed indirectly.
20339 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
20340 // passed indirectly. So the address of the value will be passed in a
20341 // register, or if not available, then the address is put on the stack. In
20342 // order to pass indirectly, space on the stack often needs to be allocated
20343 // in order to store the value. In this case the CCInfo.getNextStackOffset()
20344 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
20345 // are passed CCValAssign::Indirect.
20346 for (auto &VA : ArgLocs)
20347 if (VA.getLocInfo() == CCValAssign::Indirect)
20348 return false;
20349
20350 // Do not tail call opt if either caller or callee uses struct return
20351 // semantics.
20352 auto IsCallerStructRet = Caller.hasStructRetAttr();
20353 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
20354 if (IsCallerStructRet || IsCalleeStructRet)
20355 return false;
20356
20357 // The callee has to preserve all registers the caller needs to preserve.
20358 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
20359 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
20360 if (CalleeCC != CallerCC) {
20361 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
20362 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
20363 return false;
20364 }
20365
20366 // Byval parameters hand the function a pointer directly into the stack area
20367 // we want to reuse during a tail call. Working around this *is* possible
20368 // but less efficient and uglier in LowerCall.
20369 for (auto &Arg : Outs)
20370 if (Arg.Flags.isByVal())
20371 return false;
20372
20373 return true;
20374}
20375
20377 return DAG.getDataLayout().getPrefTypeAlign(
20378 VT.getTypeForEVT(*DAG.getContext()));
20379}
20380
20381// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
20382// and output parameter nodes.
20384 SmallVectorImpl<SDValue> &InVals) const {
20385 SelectionDAG &DAG = CLI.DAG;
20386 SDLoc &DL = CLI.DL;
20388 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
20390 SDValue Chain = CLI.Chain;
20391 SDValue Callee = CLI.Callee;
20392 bool &IsTailCall = CLI.IsTailCall;
20393 CallingConv::ID CallConv = CLI.CallConv;
20394 bool IsVarArg = CLI.IsVarArg;
20395 EVT PtrVT = getPointerTy(DAG.getDataLayout());
20396 MVT XLenVT = Subtarget.getXLenVT();
20397
20399
20400 // Analyze the operands of the call, assigning locations to each operand.
20402 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
20403
20404 if (CallConv == CallingConv::GHC) {
20405 if (Subtarget.hasStdExtE())
20406 report_fatal_error("GHC calling convention is not supported on RVE!");
20407 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
20408 } else
20409 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
20411 : CC_RISCV);
20412
20413 // Check if it's really possible to do a tail call.
20414 if (IsTailCall)
20415 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
20416
20417 if (IsTailCall)
20418 ++NumTailCalls;
20419 else if (CLI.CB && CLI.CB->isMustTailCall())
20420 report_fatal_error("failed to perform tail call elimination on a call "
20421 "site marked musttail");
20422
20423 // Get a count of how many bytes are to be pushed on the stack.
20424 unsigned NumBytes = ArgCCInfo.getStackSize();
20425
20426 // Create local copies for byval args
20427 SmallVector<SDValue, 8> ByValArgs;
20428 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
20429 ISD::ArgFlagsTy Flags = Outs[i].Flags;
20430 if (!Flags.isByVal())
20431 continue;
20432
20433 SDValue Arg = OutVals[i];
20434 unsigned Size = Flags.getByValSize();
20435 Align Alignment = Flags.getNonZeroByValAlign();
20436
20437 int FI =
20438 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
20439 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
20440 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
20441
20442 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
20443 /*IsVolatile=*/false,
20444 /*AlwaysInline=*/false, /*CI*/ nullptr, IsTailCall,
20446 ByValArgs.push_back(FIPtr);
20447 }
20448
20449 if (!IsTailCall)
20450 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
20451
20452 // Copy argument values to their designated locations.
20454 SmallVector<SDValue, 8> MemOpChains;
20455 SDValue StackPtr;
20456 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
20457 ++i, ++OutIdx) {
20458 CCValAssign &VA = ArgLocs[i];
20459 SDValue ArgValue = OutVals[OutIdx];
20460 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
20461
20462 // Handle passing f64 on RV32D with a soft float ABI as a special case.
20463 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20464 assert(VA.isRegLoc() && "Expected register VA assignment");
20465 assert(VA.needsCustom());
20466 SDValue SplitF64 = DAG.getNode(
20467 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
20468 SDValue Lo = SplitF64.getValue(0);
20469 SDValue Hi = SplitF64.getValue(1);
20470
20471 Register RegLo = VA.getLocReg();
20472 RegsToPass.push_back(std::make_pair(RegLo, Lo));
20473
20474 // Get the CCValAssign for the Hi part.
20475 CCValAssign &HiVA = ArgLocs[++i];
20476
20477 if (HiVA.isMemLoc()) {
20478 // Second half of f64 is passed on the stack.
20479 if (!StackPtr.getNode())
20480 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
20482 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
20483 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
20484 // Emit the store.
20485 MemOpChains.push_back(DAG.getStore(
20486 Chain, DL, Hi, Address,
20488 } else {
20489 // Second half of f64 is passed in another GPR.
20490 Register RegHigh = HiVA.getLocReg();
20491 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
20492 }
20493 continue;
20494 }
20495
20496 // Promote the value if needed.
20497 // For now, only handle fully promoted and indirect arguments.
20498 if (VA.getLocInfo() == CCValAssign::Indirect) {
20499 // Store the argument in a stack slot and pass its address.
20500 Align StackAlign =
20501 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
20502 getPrefTypeAlign(ArgValue.getValueType(), DAG));
20503 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
20504 // If the original argument was split (e.g. i128), we need
20505 // to store the required parts of it here (and pass just one address).
20506 // Vectors may be partly split to registers and partly to the stack, in
20507 // which case the base address is partly offset and subsequent stores are
20508 // relative to that.
20509 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
20510 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
20511 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
20512 // Calculate the total size to store. We don't have access to what we're
20513 // actually storing other than performing the loop and collecting the
20514 // info.
20516 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
20517 SDValue PartValue = OutVals[OutIdx + 1];
20518 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
20519 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
20520 EVT PartVT = PartValue.getValueType();
20521 if (PartVT.isScalableVector())
20522 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
20523 StoredSize += PartVT.getStoreSize();
20524 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
20525 Parts.push_back(std::make_pair(PartValue, Offset));
20526 ++i;
20527 ++OutIdx;
20528 }
20529 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
20530 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
20531 MemOpChains.push_back(
20532 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
20534 for (const auto &Part : Parts) {
20535 SDValue PartValue = Part.first;
20536 SDValue PartOffset = Part.second;
20538 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
20539 MemOpChains.push_back(
20540 DAG.getStore(Chain, DL, PartValue, Address,
20542 }
20543 ArgValue = SpillSlot;
20544 } else {
20545 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
20546 }
20547
20548 // Use local copy if it is a byval arg.
20549 if (Flags.isByVal())
20550 ArgValue = ByValArgs[j++];
20551
20552 if (VA.isRegLoc()) {
20553 // Queue up the argument copies and emit them at the end.
20554 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
20555 } else {
20556 assert(VA.isMemLoc() && "Argument not register or memory");
20557 assert(!IsTailCall && "Tail call not allowed if stack is used "
20558 "for passing parameters");
20559
20560 // Work out the address of the stack slot.
20561 if (!StackPtr.getNode())
20562 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
20564 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
20566
20567 // Emit the store.
20568 MemOpChains.push_back(
20569 DAG.getStore(Chain, DL, ArgValue, Address,
20571 }
20572 }
20573
20574 // Join the stores, which are independent of one another.
20575 if (!MemOpChains.empty())
20576 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
20577
20578 SDValue Glue;
20579
20580 // Build a sequence of copy-to-reg nodes, chained and glued together.
20581 for (auto &Reg : RegsToPass) {
20582 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
20583 Glue = Chain.getValue(1);
20584 }
20585
20586 // Validate that none of the argument registers have been marked as
20587 // reserved, if so report an error. Do the same for the return address if this
20588 // is not a tailcall.
20589 validateCCReservedRegs(RegsToPass, MF);
20590 if (!IsTailCall && MF.getSubtarget().isRegisterReservedByUser(RISCV::X1))
20592 MF.getFunction(),
20593 "Return address register required, but has been reserved."});
20594
20595 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
20596 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
20597 // split it and then direct call can be matched by PseudoCALL.
20598 bool CalleeIsLargeExternalSymbol = false;
20600 if (auto *S = dyn_cast<GlobalAddressSDNode>(Callee))
20601 Callee = getLargeGlobalAddress(S, DL, PtrVT, DAG);
20602 else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
20603 Callee = getLargeExternalSymbol(S, DL, PtrVT, DAG);
20604 CalleeIsLargeExternalSymbol = true;
20605 }
20606 } else if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
20607 const GlobalValue *GV = S->getGlobal();
20608 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL);
20609 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
20610 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL);
20611 }
20612
20613 // The first call operand is the chain and the second is the target address.
20615 Ops.push_back(Chain);
20616 Ops.push_back(Callee);
20617
20618 // Add argument registers to the end of the list so that they are
20619 // known live into the call.
20620 for (auto &Reg : RegsToPass)
20621 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
20622
20623 // Add a register mask operand representing the call-preserved registers.
20624 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
20625 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
20626 assert(Mask && "Missing call preserved mask for calling convention");
20627 Ops.push_back(DAG.getRegisterMask(Mask));
20628
20629 // Glue the call to the argument copies, if any.
20630 if (Glue.getNode())
20631 Ops.push_back(Glue);
20632
20633 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
20634 "Unexpected CFI type for a direct call");
20635
20636 // Emit the call.
20637 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
20638
20639 // Use software guarded branch for large code model non-indirect calls
20640 // Tail call to external symbol will have a null CLI.CB and we need another
20641 // way to determine the callsite type
20642 bool NeedSWGuarded = false;
20644 Subtarget.hasStdExtZicfilp() &&
20645 ((CLI.CB && !CLI.CB->isIndirectCall()) || CalleeIsLargeExternalSymbol))
20646 NeedSWGuarded = true;
20647
20648 if (IsTailCall) {
20650 unsigned CallOpc =
20651 NeedSWGuarded ? RISCVISD::SW_GUARDED_TAIL : RISCVISD::TAIL;
20652 SDValue Ret = DAG.getNode(CallOpc, DL, NodeTys, Ops);
20653 if (CLI.CFIType)
20654 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
20655 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
20656 return Ret;
20657 }
20658
20659 unsigned CallOpc = NeedSWGuarded ? RISCVISD::SW_GUARDED_CALL : RISCVISD::CALL;
20660 Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
20661 if (CLI.CFIType)
20662 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
20663 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
20664 Glue = Chain.getValue(1);
20665
20666 // Mark the end of the call, which is glued to the call itself.
20667 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
20668 Glue = Chain.getValue(1);
20669
20670 // Assign locations to each value returned by this call.
20672 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
20673 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_RISCV);
20674
20675 // Copy all of the result registers out of their specified physreg.
20676 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
20677 auto &VA = RVLocs[i];
20678 // Copy the value out
20679 SDValue RetValue =
20680 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
20681 // Glue the RetValue to the end of the call sequence
20682 Chain = RetValue.getValue(1);
20683 Glue = RetValue.getValue(2);
20684
20685 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20686 assert(VA.needsCustom());
20687 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
20688 MVT::i32, Glue);
20689 Chain = RetValue2.getValue(1);
20690 Glue = RetValue2.getValue(2);
20691 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
20692 RetValue2);
20693 } else
20694 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
20695
20696 InVals.push_back(RetValue);
20697 }
20698
20699 return Chain;
20700}
20701
20703 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
20704 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
20705 const Type *RetTy) const {
20707 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
20708
20709 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
20710 MVT VT = Outs[i].VT;
20711 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
20712 if (CC_RISCV(i, VT, VT, CCValAssign::Full, ArgFlags, CCInfo,
20713 /*IsFixed=*/true, /*IsRet=*/true, nullptr))
20714 return false;
20715 }
20716 return true;
20717}
20718
20719SDValue
20721 bool IsVarArg,
20723 const SmallVectorImpl<SDValue> &OutVals,
20724 const SDLoc &DL, SelectionDAG &DAG) const {
20726 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
20727
20728 // Stores the assignment of the return value to a location.
20730
20731 // Info about the registers and stack slot.
20732 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
20733 *DAG.getContext());
20734
20735 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
20736 nullptr, CC_RISCV);
20737
20738 if (CallConv == CallingConv::GHC && !RVLocs.empty())
20739 report_fatal_error("GHC functions return void only");
20740
20741 SDValue Glue;
20742 SmallVector<SDValue, 4> RetOps(1, Chain);
20743
20744 // Copy the result values into the output registers.
20745 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
20746 SDValue Val = OutVals[OutIdx];
20747 CCValAssign &VA = RVLocs[i];
20748 assert(VA.isRegLoc() && "Can only return in registers!");
20749
20750 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20751 // Handle returning f64 on RV32D with a soft float ABI.
20752 assert(VA.isRegLoc() && "Expected return via registers");
20753 assert(VA.needsCustom());
20754 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
20755 DAG.getVTList(MVT::i32, MVT::i32), Val);
20756 SDValue Lo = SplitF64.getValue(0);
20757 SDValue Hi = SplitF64.getValue(1);
20758 Register RegLo = VA.getLocReg();
20759 Register RegHi = RVLocs[++i].getLocReg();
20760
20761 if (STI.isRegisterReservedByUser(RegLo) ||
20762 STI.isRegisterReservedByUser(RegHi))
20764 MF.getFunction(),
20765 "Return value register required, but has been reserved."});
20766
20767 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
20768 Glue = Chain.getValue(1);
20769 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
20770 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
20771 Glue = Chain.getValue(1);
20772 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
20773 } else {
20774 // Handle a 'normal' return.
20775 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
20776 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
20777
20778 if (STI.isRegisterReservedByUser(VA.getLocReg()))
20780 MF.getFunction(),
20781 "Return value register required, but has been reserved."});
20782
20783 // Guarantee that all emitted copies are stuck together.
20784 Glue = Chain.getValue(1);
20785 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
20786 }
20787 }
20788
20789 RetOps[0] = Chain; // Update chain.
20790
20791 // Add the glue node if we have it.
20792 if (Glue.getNode()) {
20793 RetOps.push_back(Glue);
20794 }
20795
20796 if (any_of(RVLocs,
20797 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
20798 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
20799
20800 unsigned RetOpc = RISCVISD::RET_GLUE;
20801 // Interrupt service routines use different return instructions.
20802 const Function &Func = DAG.getMachineFunction().getFunction();
20803 if (Func.hasFnAttribute("interrupt")) {
20804 if (!Func.getReturnType()->isVoidTy())
20806 "Functions with the interrupt attribute must have void return type!");
20807
20809 StringRef Kind =
20810 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
20811
20812 if (Kind == "supervisor")
20813 RetOpc = RISCVISD::SRET_GLUE;
20814 else
20815 RetOpc = RISCVISD::MRET_GLUE;
20816 }
20817
20818 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
20819}
20820
20821void RISCVTargetLowering::validateCCReservedRegs(
20822 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
20823 MachineFunction &MF) const {
20824 const Function &F = MF.getFunction();
20825 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
20826
20827 if (llvm::any_of(Regs, [&STI](auto Reg) {
20828 return STI.isRegisterReservedByUser(Reg.first);
20829 }))
20830 F.getContext().diagnose(DiagnosticInfoUnsupported{
20831 F, "Argument register required, but has been reserved."});
20832}
20833
20834// Check if the result of the node is only used as a return value, as
20835// otherwise we can't perform a tail-call.
20837 if (N->getNumValues() != 1)
20838 return false;
20839 if (!N->hasNUsesOfValue(1, 0))
20840 return false;
20841
20842 SDNode *Copy = *N->user_begin();
20843
20844 if (Copy->getOpcode() == ISD::BITCAST) {
20845 return isUsedByReturnOnly(Copy, Chain);
20846 }
20847
20848 // TODO: Handle additional opcodes in order to support tail-calling libcalls
20849 // with soft float ABIs.
20850 if (Copy->getOpcode() != ISD::CopyToReg) {
20851 return false;
20852 }
20853
20854 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
20855 // isn't safe to perform a tail call.
20856 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
20857 return false;
20858
20859 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
20860 bool HasRet = false;
20861 for (SDNode *Node : Copy->users()) {
20862 if (Node->getOpcode() != RISCVISD::RET_GLUE)
20863 return false;
20864 HasRet = true;
20865 }
20866 if (!HasRet)
20867 return false;
20868
20869 Chain = Copy->getOperand(0);
20870 return true;
20871}
20872
20874 return CI->isTailCall();
20875}
20876
20877const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
20878#define NODE_NAME_CASE(NODE) \
20879 case RISCVISD::NODE: \
20880 return "RISCVISD::" #NODE;
20881 // clang-format off
20882 switch ((RISCVISD::NodeType)Opcode) {
20884 break;
20885 NODE_NAME_CASE(RET_GLUE)
20886 NODE_NAME_CASE(SRET_GLUE)
20887 NODE_NAME_CASE(MRET_GLUE)
20888 NODE_NAME_CASE(CALL)
20889 NODE_NAME_CASE(TAIL)
20890 NODE_NAME_CASE(SELECT_CC)
20891 NODE_NAME_CASE(BR_CC)
20892 NODE_NAME_CASE(BuildGPRPair)
20893 NODE_NAME_CASE(SplitGPRPair)
20894 NODE_NAME_CASE(BuildPairF64)
20895 NODE_NAME_CASE(SplitF64)
20896 NODE_NAME_CASE(ADD_LO)
20897 NODE_NAME_CASE(HI)
20898 NODE_NAME_CASE(LLA)
20899 NODE_NAME_CASE(ADD_TPREL)
20900 NODE_NAME_CASE(MULHSU)
20901 NODE_NAME_CASE(SHL_ADD)
20902 NODE_NAME_CASE(SLLW)
20903 NODE_NAME_CASE(SRAW)
20904 NODE_NAME_CASE(SRLW)
20905 NODE_NAME_CASE(DIVW)
20906 NODE_NAME_CASE(DIVUW)
20907 NODE_NAME_CASE(REMUW)
20908 NODE_NAME_CASE(ROLW)
20909 NODE_NAME_CASE(RORW)
20910 NODE_NAME_CASE(CLZW)
20911 NODE_NAME_CASE(CTZW)
20912 NODE_NAME_CASE(ABSW)
20913 NODE_NAME_CASE(FMV_H_X)
20914 NODE_NAME_CASE(FMV_X_ANYEXTH)
20915 NODE_NAME_CASE(FMV_X_SIGNEXTH)
20916 NODE_NAME_CASE(FMV_W_X_RV64)
20917 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
20918 NODE_NAME_CASE(FCVT_X)
20919 NODE_NAME_CASE(FCVT_XU)
20920 NODE_NAME_CASE(FCVT_W_RV64)
20921 NODE_NAME_CASE(FCVT_WU_RV64)
20922 NODE_NAME_CASE(STRICT_FCVT_W_RV64)
20923 NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
20924 NODE_NAME_CASE(FROUND)
20925 NODE_NAME_CASE(FCLASS)
20926 NODE_NAME_CASE(FSGNJX)
20927 NODE_NAME_CASE(FMAX)
20928 NODE_NAME_CASE(FMIN)
20929 NODE_NAME_CASE(FLI)
20930 NODE_NAME_CASE(READ_COUNTER_WIDE)
20931 NODE_NAME_CASE(BREV8)
20932 NODE_NAME_CASE(ORC_B)
20933 NODE_NAME_CASE(ZIP)
20934 NODE_NAME_CASE(UNZIP)
20935 NODE_NAME_CASE(CLMUL)
20936 NODE_NAME_CASE(CLMULH)
20937 NODE_NAME_CASE(CLMULR)
20938 NODE_NAME_CASE(MOPR)
20939 NODE_NAME_CASE(MOPRR)
20940 NODE_NAME_CASE(SHA256SIG0)
20941 NODE_NAME_CASE(SHA256SIG1)
20942 NODE_NAME_CASE(SHA256SUM0)
20943 NODE_NAME_CASE(SHA256SUM1)
20944 NODE_NAME_CASE(SM4KS)
20945 NODE_NAME_CASE(SM4ED)
20946 NODE_NAME_CASE(SM3P0)
20947 NODE_NAME_CASE(SM3P1)
20948 NODE_NAME_CASE(TH_LWD)
20949 NODE_NAME_CASE(TH_LWUD)
20950 NODE_NAME_CASE(TH_LDD)
20951 NODE_NAME_CASE(TH_SWD)
20952 NODE_NAME_CASE(TH_SDD)
20953 NODE_NAME_CASE(VMV_V_V_VL)
20954 NODE_NAME_CASE(VMV_V_X_VL)
20955 NODE_NAME_CASE(VFMV_V_F_VL)
20956 NODE_NAME_CASE(VMV_X_S)
20957 NODE_NAME_CASE(VMV_S_X_VL)
20958 NODE_NAME_CASE(VFMV_S_F_VL)
20959 NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
20960 NODE_NAME_CASE(READ_VLENB)
20961 NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
20962 NODE_NAME_CASE(TRUNCATE_VECTOR_VL_SSAT)
20963 NODE_NAME_CASE(TRUNCATE_VECTOR_VL_USAT)
20964 NODE_NAME_CASE(VSLIDEUP_VL)
20965 NODE_NAME_CASE(VSLIDE1UP_VL)
20966 NODE_NAME_CASE(VSLIDEDOWN_VL)
20967 NODE_NAME_CASE(VSLIDE1DOWN_VL)
20968 NODE_NAME_CASE(VFSLIDE1UP_VL)
20969 NODE_NAME_CASE(VFSLIDE1DOWN_VL)
20970 NODE_NAME_CASE(VID_VL)
20971 NODE_NAME_CASE(VFNCVT_ROD_VL)
20972 NODE_NAME_CASE(VECREDUCE_ADD_VL)
20973 NODE_NAME_CASE(VECREDUCE_UMAX_VL)
20974 NODE_NAME_CASE(VECREDUCE_SMAX_VL)
20975 NODE_NAME_CASE(VECREDUCE_UMIN_VL)
20976 NODE_NAME_CASE(VECREDUCE_SMIN_VL)
20977 NODE_NAME_CASE(VECREDUCE_AND_VL)
20978 NODE_NAME_CASE(VECREDUCE_OR_VL)
20979 NODE_NAME_CASE(VECREDUCE_XOR_VL)
20980 NODE_NAME_CASE(VECREDUCE_FADD_VL)
20981 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
20982 NODE_NAME_CASE(VECREDUCE_FMIN_VL)
20983 NODE_NAME_CASE(VECREDUCE_FMAX_VL)
20984 NODE_NAME_CASE(ADD_VL)
20985 NODE_NAME_CASE(AND_VL)
20986 NODE_NAME_CASE(MUL_VL)
20987 NODE_NAME_CASE(OR_VL)
20988 NODE_NAME_CASE(SDIV_VL)
20989 NODE_NAME_CASE(SHL_VL)
20990 NODE_NAME_CASE(SREM_VL)
20991 NODE_NAME_CASE(SRA_VL)
20992 NODE_NAME_CASE(SRL_VL)