LLVM 22.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
19#include "RISCVRegisterInfo.h"
21#include "RISCVSubtarget.h"
22#include "llvm/ADT/SmallSet.h"
24#include "llvm/ADT/Statistic.h"
39#include "llvm/IR/IRBuilder.h"
42#include "llvm/IR/IntrinsicsRISCV.h"
46#include "llvm/Support/Debug.h"
52#include <optional>
53
54using namespace llvm;
55
56#define DEBUG_TYPE "riscv-lower"
57
58STATISTIC(NumTailCalls, "Number of tail calls");
59
61 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
62 cl::desc("Give the maximum size (in number of nodes) of the web of "
63 "instructions that we will consider for VW expansion"),
64 cl::init(18));
65
66static cl::opt<bool>
67 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
68 cl::desc("Allow the formation of VW_W operations (e.g., "
69 "VWADD_W) with splat constants"),
70 cl::init(false));
71
73 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
74 cl::desc("Set the minimum number of repetitions of a divisor to allow "
75 "transformation to multiplications by the reciprocal"),
76 cl::init(2));
77
78static cl::opt<int>
80 cl::desc("Give the maximum number of instructions that we will "
81 "use for creating a floating-point immediate value"),
82 cl::init(3));
83
84static cl::opt<bool>
85 ReassocShlAddiAdd("reassoc-shl-addi-add", cl::Hidden,
86 cl::desc("Swap add and addi in cases where the add may "
87 "be combined with a shift"),
88 cl::init(true));
89
90// TODO: Support more ops
91static const unsigned ZvfbfaVPOps[] = {ISD::VP_FNEG, ISD::VP_FABS,
92 ISD::VP_FCOPYSIGN};
93static const unsigned ZvfbfaOps[] = {ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN};
94
96 const RISCVSubtarget &STI)
97 : TargetLowering(TM), Subtarget(STI) {
98
99 RISCVABI::ABI ABI = Subtarget.getTargetABI();
100 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
101
102 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
103 !Subtarget.hasStdExtF()) {
104 errs() << "Hard-float 'f' ABI can't be used for a target that "
105 "doesn't support the F instruction set extension (ignoring "
106 "target-abi)\n";
107 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
108 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
109 !Subtarget.hasStdExtD()) {
110 errs() << "Hard-float 'd' ABI can't be used for a target that "
111 "doesn't support the D instruction set extension (ignoring "
112 "target-abi)\n";
113 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
114 }
115
116 switch (ABI) {
117 default:
118 reportFatalUsageError("Don't know how to lower this ABI");
127 break;
128 }
129
130 MVT XLenVT = Subtarget.getXLenVT();
131
132 // Set up the register classes.
133 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
134
135 if (Subtarget.hasStdExtZfhmin())
136 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
137 if (Subtarget.hasStdExtZfbfmin() || Subtarget.hasVendorXAndesBFHCvt())
138 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
139 if (Subtarget.hasStdExtF())
140 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
141 if (Subtarget.hasStdExtD())
142 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
143 if (Subtarget.hasStdExtZhinxmin())
144 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
145 if (Subtarget.hasStdExtZfinx())
146 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
147 if (Subtarget.hasStdExtZdinx()) {
148 if (Subtarget.is64Bit())
149 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
150 else
151 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
152 }
153
154 static const MVT::SimpleValueType BoolVecVTs[] = {
155 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
156 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
157 static const MVT::SimpleValueType IntVecVTs[] = {
158 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
159 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
160 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
161 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
162 MVT::nxv4i64, MVT::nxv8i64};
163 static const MVT::SimpleValueType F16VecVTs[] = {
164 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
165 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
166 static const MVT::SimpleValueType BF16VecVTs[] = {
167 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
168 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
169 static const MVT::SimpleValueType F32VecVTs[] = {
170 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
171 static const MVT::SimpleValueType F64VecVTs[] = {
172 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
173 static const MVT::SimpleValueType VecTupleVTs[] = {
174 MVT::riscv_nxv1i8x2, MVT::riscv_nxv1i8x3, MVT::riscv_nxv1i8x4,
175 MVT::riscv_nxv1i8x5, MVT::riscv_nxv1i8x6, MVT::riscv_nxv1i8x7,
176 MVT::riscv_nxv1i8x8, MVT::riscv_nxv2i8x2, MVT::riscv_nxv2i8x3,
177 MVT::riscv_nxv2i8x4, MVT::riscv_nxv2i8x5, MVT::riscv_nxv2i8x6,
178 MVT::riscv_nxv2i8x7, MVT::riscv_nxv2i8x8, MVT::riscv_nxv4i8x2,
179 MVT::riscv_nxv4i8x3, MVT::riscv_nxv4i8x4, MVT::riscv_nxv4i8x5,
180 MVT::riscv_nxv4i8x6, MVT::riscv_nxv4i8x7, MVT::riscv_nxv4i8x8,
181 MVT::riscv_nxv8i8x2, MVT::riscv_nxv8i8x3, MVT::riscv_nxv8i8x4,
182 MVT::riscv_nxv8i8x5, MVT::riscv_nxv8i8x6, MVT::riscv_nxv8i8x7,
183 MVT::riscv_nxv8i8x8, MVT::riscv_nxv16i8x2, MVT::riscv_nxv16i8x3,
184 MVT::riscv_nxv16i8x4, MVT::riscv_nxv32i8x2};
185
186 if (Subtarget.hasVInstructions()) {
187 auto addRegClassForRVV = [this](MVT VT) {
188 // Disable the smallest fractional LMUL types if ELEN is less than
189 // RVVBitsPerBlock.
190 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
191 if (VT.getVectorMinNumElements() < MinElts)
192 return;
193
194 unsigned Size = VT.getSizeInBits().getKnownMinValue();
195 const TargetRegisterClass *RC;
197 RC = &RISCV::VRRegClass;
198 else if (Size == 2 * RISCV::RVVBitsPerBlock)
199 RC = &RISCV::VRM2RegClass;
200 else if (Size == 4 * RISCV::RVVBitsPerBlock)
201 RC = &RISCV::VRM4RegClass;
202 else if (Size == 8 * RISCV::RVVBitsPerBlock)
203 RC = &RISCV::VRM8RegClass;
204 else
205 llvm_unreachable("Unexpected size");
206
207 addRegisterClass(VT, RC);
208 };
209
210 for (MVT VT : BoolVecVTs)
211 addRegClassForRVV(VT);
212 for (MVT VT : IntVecVTs) {
213 if (VT.getVectorElementType() == MVT::i64 &&
214 !Subtarget.hasVInstructionsI64())
215 continue;
216 addRegClassForRVV(VT);
217 }
218
219 if (Subtarget.hasVInstructionsF16Minimal() ||
220 Subtarget.hasVendorXAndesVPackFPH())
221 for (MVT VT : F16VecVTs)
222 addRegClassForRVV(VT);
223
224 if (Subtarget.hasVInstructionsBF16Minimal() ||
225 Subtarget.hasVendorXAndesVBFHCvt())
226 for (MVT VT : BF16VecVTs)
227 addRegClassForRVV(VT);
228
229 if (Subtarget.hasVInstructionsF32())
230 for (MVT VT : F32VecVTs)
231 addRegClassForRVV(VT);
232
233 if (Subtarget.hasVInstructionsF64())
234 for (MVT VT : F64VecVTs)
235 addRegClassForRVV(VT);
236
237 if (Subtarget.useRVVForFixedLengthVectors()) {
238 auto addRegClassForFixedVectors = [this](MVT VT) {
239 MVT ContainerVT = getContainerForFixedLengthVector(VT);
240 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
241 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
242 addRegisterClass(VT, TRI.getRegClass(RCID));
243 };
245 if (useRVVForFixedLengthVectorVT(VT))
246 addRegClassForFixedVectors(VT);
247
249 if (useRVVForFixedLengthVectorVT(VT))
250 addRegClassForFixedVectors(VT);
251 }
252
253 addRegisterClass(MVT::riscv_nxv1i8x2, &RISCV::VRN2M1RegClass);
254 addRegisterClass(MVT::riscv_nxv1i8x3, &RISCV::VRN3M1RegClass);
255 addRegisterClass(MVT::riscv_nxv1i8x4, &RISCV::VRN4M1RegClass);
256 addRegisterClass(MVT::riscv_nxv1i8x5, &RISCV::VRN5M1RegClass);
257 addRegisterClass(MVT::riscv_nxv1i8x6, &RISCV::VRN6M1RegClass);
258 addRegisterClass(MVT::riscv_nxv1i8x7, &RISCV::VRN7M1RegClass);
259 addRegisterClass(MVT::riscv_nxv1i8x8, &RISCV::VRN8M1RegClass);
260 addRegisterClass(MVT::riscv_nxv2i8x2, &RISCV::VRN2M1RegClass);
261 addRegisterClass(MVT::riscv_nxv2i8x3, &RISCV::VRN3M1RegClass);
262 addRegisterClass(MVT::riscv_nxv2i8x4, &RISCV::VRN4M1RegClass);
263 addRegisterClass(MVT::riscv_nxv2i8x5, &RISCV::VRN5M1RegClass);
264 addRegisterClass(MVT::riscv_nxv2i8x6, &RISCV::VRN6M1RegClass);
265 addRegisterClass(MVT::riscv_nxv2i8x7, &RISCV::VRN7M1RegClass);
266 addRegisterClass(MVT::riscv_nxv2i8x8, &RISCV::VRN8M1RegClass);
267 addRegisterClass(MVT::riscv_nxv4i8x2, &RISCV::VRN2M1RegClass);
268 addRegisterClass(MVT::riscv_nxv4i8x3, &RISCV::VRN3M1RegClass);
269 addRegisterClass(MVT::riscv_nxv4i8x4, &RISCV::VRN4M1RegClass);
270 addRegisterClass(MVT::riscv_nxv4i8x5, &RISCV::VRN5M1RegClass);
271 addRegisterClass(MVT::riscv_nxv4i8x6, &RISCV::VRN6M1RegClass);
272 addRegisterClass(MVT::riscv_nxv4i8x7, &RISCV::VRN7M1RegClass);
273 addRegisterClass(MVT::riscv_nxv4i8x8, &RISCV::VRN8M1RegClass);
274 addRegisterClass(MVT::riscv_nxv8i8x2, &RISCV::VRN2M1RegClass);
275 addRegisterClass(MVT::riscv_nxv8i8x3, &RISCV::VRN3M1RegClass);
276 addRegisterClass(MVT::riscv_nxv8i8x4, &RISCV::VRN4M1RegClass);
277 addRegisterClass(MVT::riscv_nxv8i8x5, &RISCV::VRN5M1RegClass);
278 addRegisterClass(MVT::riscv_nxv8i8x6, &RISCV::VRN6M1RegClass);
279 addRegisterClass(MVT::riscv_nxv8i8x7, &RISCV::VRN7M1RegClass);
280 addRegisterClass(MVT::riscv_nxv8i8x8, &RISCV::VRN8M1RegClass);
281 addRegisterClass(MVT::riscv_nxv16i8x2, &RISCV::VRN2M2RegClass);
282 addRegisterClass(MVT::riscv_nxv16i8x3, &RISCV::VRN3M2RegClass);
283 addRegisterClass(MVT::riscv_nxv16i8x4, &RISCV::VRN4M2RegClass);
284 addRegisterClass(MVT::riscv_nxv32i8x2, &RISCV::VRN2M4RegClass);
285 }
286
287 // Compute derived properties from the register classes.
289
291
293 MVT::i1, Promote);
294 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
296 MVT::i1, Promote);
297
298 // TODO: add all necessary setOperationAction calls.
299 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Custom);
300
301 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
302 setOperationAction(ISD::BR_CC, XLenVT, Expand);
303 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
305
310 if (!(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
313 }
314
315 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
316
317 setOperationAction(ISD::VASTART, MVT::Other, Custom);
318 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
319
320 if (!Subtarget.hasVendorXTHeadBb() && !Subtarget.hasVendorXqcibm() &&
321 !Subtarget.hasVendorXAndesPerf())
323
325
326 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasStdExtP() &&
327 !Subtarget.hasVendorXTHeadBb() && !Subtarget.hasVendorXqcibm() &&
328 !Subtarget.hasVendorXAndesPerf() &&
329 !(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()))
330 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
331
332 if (Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit()) {
333 setOperationAction(ISD::LOAD, MVT::i64, Custom);
334 setOperationAction(ISD::STORE, MVT::i64, Custom);
335 }
336
337 if (Subtarget.is64Bit()) {
339
340 setOperationAction(ISD::LOAD, MVT::i32, Custom);
342 MVT::i32, Custom);
344 if (!Subtarget.hasStdExtZbb())
347 Custom);
349 }
350 if (!Subtarget.hasStdExtZmmul()) {
352 } else if (Subtarget.is64Bit()) {
355 } else {
357 }
358
359 if (!Subtarget.hasStdExtM()) {
361 Expand);
362 } else if (Subtarget.is64Bit()) {
364 {MVT::i8, MVT::i16, MVT::i32}, Custom);
365 }
366
369 Expand);
370
372 Custom);
373
374 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
375 if (Subtarget.is64Bit())
377 } else if (Subtarget.hasVendorXTHeadBb()) {
378 if (Subtarget.is64Bit())
381 } else if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
383 } else {
385 }
386
388 Subtarget.hasREV8Like() ? Legal : Expand);
389
390 if ((Subtarget.hasVendorXCVbitmanip() || Subtarget.hasVendorXqcibm()) &&
391 !Subtarget.is64Bit()) {
393 } else {
394 // Zbkb can use rev8+brev8 to implement bitreverse.
396 Subtarget.hasStdExtZbkb() ? Custom : Expand);
397 if (Subtarget.hasStdExtZbkb())
399 }
400
401 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtP() ||
402 (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
404 Legal);
405 }
406
407 if (Subtarget.hasCTZLike()) {
408 if (Subtarget.is64Bit())
410 } else {
412 // If have a CLZW, but not CTZW, custom promote i32.
413 if (Subtarget.hasStdExtP() && Subtarget.is64Bit())
415 }
416
417 if (!Subtarget.hasCPOPLike()) {
418 // TODO: These should be set to LibCall, but this currently breaks
419 // the Linux kernel build. See #101786. Lacks i128 tests, too.
420 if (Subtarget.is64Bit())
422 else
425 }
426
427 if (Subtarget.hasCLZLike()) {
428 // We need the custom lowering to make sure that the resulting sequence
429 // for the 32bit case is efficient on 64bit targets.
430 // Use default promotion for i32 without Zbb.
431 if (Subtarget.is64Bit() &&
432 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtP()))
434 } else {
436 }
437
438 if (Subtarget.hasStdExtP() ||
439 (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
441 if (Subtarget.is64Bit())
443 } else if (Subtarget.hasShortForwardBranchOpt()) {
444 // We can use PseudoCCSUB to implement ABS.
446 } else if (Subtarget.is64Bit()) {
448 }
449
450 if (!Subtarget.useMIPSCCMovInsn() && !Subtarget.hasVendorXTHeadCondMov())
452
453 if (Subtarget.hasVendorXqcia() && !Subtarget.is64Bit()) {
460 }
461
462 static const unsigned FPLegalNodeTypes[] = {
463 ISD::FMINNUM, ISD::FMAXNUM, ISD::FMINIMUMNUM,
464 ISD::FMAXIMUMNUM, ISD::LRINT, ISD::LLRINT,
465 ISD::LROUND, ISD::LLROUND, ISD::STRICT_LRINT,
470
471 static const ISD::CondCode FPCCToExpand[] = {
475
476 static const unsigned FPOpToExpand[] = {
477 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW,
478 ISD::FREM};
479
480 static const unsigned FPRndMode[] = {
481 ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
482 ISD::FROUNDEVEN};
483
484 static const unsigned ZfhminZfbfminPromoteOps[] = {
485 ISD::FMINNUM, ISD::FMAXNUM, ISD::FMAXIMUMNUM,
486 ISD::FMINIMUMNUM, ISD::FADD, ISD::FSUB,
491 ISD::SETCC, ISD::FCEIL, ISD::FFLOOR,
492 ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
493 ISD::FROUNDEVEN, ISD::FCANONICALIZE};
494
495 if (Subtarget.hasStdExtZfbfmin()) {
496 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
500 setOperationAction(ISD::BR_CC, MVT::bf16, Expand);
501 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
503 setOperationAction(ISD::FABS, MVT::bf16, Custom);
504 setOperationAction(ISD::FNEG, MVT::bf16, Custom);
508 }
509
510 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
511 if (Subtarget.hasStdExtZfhOrZhinx()) {
512 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
513 setOperationAction(FPRndMode, MVT::f16,
514 Subtarget.hasStdExtZfa() ? Legal : Custom);
516 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16,
517 Subtarget.hasStdExtZfa() ? Legal : Custom);
518 if (Subtarget.hasStdExtZfa())
520 } else {
521 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
522 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16, Promote);
523 for (auto Op : {ISD::LROUND, ISD::LLROUND, ISD::LRINT, ISD::LLRINT,
526 setOperationAction(Op, MVT::f16, Custom);
527 setOperationAction(ISD::FABS, MVT::f16, Custom);
528 setOperationAction(ISD::FNEG, MVT::f16, Custom);
532 }
533
534 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
535
538 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
541 setOperationAction(ISD::BR_CC, MVT::f16, Expand);
542
544 ISD::FNEARBYINT, MVT::f16,
545 Subtarget.hasStdExtZfh() && Subtarget.hasStdExtZfa() ? Legal : Promote);
546 setOperationAction({ISD::FREM, ISD::FPOW, ISD::FPOWI,
547 ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP,
548 ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2,
549 ISD::FLOG10, ISD::FLDEXP, ISD::FFREXP, ISD::FMODF},
550 MVT::f16, Promote);
551
552 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
553 // complete support for all operations in LegalizeDAG.
558 MVT::f16, Promote);
559
560 // We need to custom promote this.
561 if (Subtarget.is64Bit())
562 setOperationAction(ISD::FPOWI, MVT::i32, Custom);
563 }
564
565 if (Subtarget.hasStdExtFOrZfinx()) {
566 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
567 setOperationAction(FPRndMode, MVT::f32,
568 Subtarget.hasStdExtZfa() ? Legal : Custom);
569 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
572 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
573 setOperationAction(FPOpToExpand, MVT::f32, Expand);
574 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
575 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
576 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
577 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
579 setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
580 setOperationAction(ISD::FP_TO_BF16, MVT::f32,
581 Subtarget.isSoftFPABI() ? LibCall : Custom);
582 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom);
583 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Custom);
584 setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f32, Custom);
585 setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f32, Custom);
586
587 if (Subtarget.hasStdExtZfa()) {
589 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
590 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Legal);
591 } else {
592 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Custom);
593 }
594 }
595
596 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
597 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
598
599 if (Subtarget.hasStdExtDOrZdinx()) {
600 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
601
602 if (!Subtarget.is64Bit())
603 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
604
605 if (Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
606 !Subtarget.is64Bit()) {
607 setOperationAction(ISD::LOAD, MVT::f64, Custom);
608 setOperationAction(ISD::STORE, MVT::f64, Custom);
609 }
610
611 if (Subtarget.hasStdExtZfa()) {
613 setOperationAction(FPRndMode, MVT::f64, Legal);
614 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
615 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Legal);
616 } else {
617 if (Subtarget.is64Bit())
618 setOperationAction(FPRndMode, MVT::f64, Custom);
619
620 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Custom);
621 }
622
625 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
628 setOperationAction(ISD::BR_CC, MVT::f64, Expand);
629 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
630 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
631 setOperationAction(FPOpToExpand, MVT::f64, Expand);
632 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
633 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
634 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
635 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
637 setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
638 setOperationAction(ISD::FP_TO_BF16, MVT::f64,
639 Subtarget.isSoftFPABI() ? LibCall : Custom);
640 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom);
641 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
642 setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f64, Custom);
643 setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f64, Expand);
644 }
645
646 if (Subtarget.is64Bit()) {
649 MVT::i32, Custom);
650 setOperationAction(ISD::LROUND, MVT::i32, Custom);
651 }
652
653 if (Subtarget.hasStdExtFOrZfinx()) {
655 Custom);
656
657 // f16/bf16 require custom handling.
659 Custom);
661 Custom);
662
664 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
665 setOperationAction(ISD::GET_FPENV, XLenVT, Custom);
666 setOperationAction(ISD::SET_FPENV, XLenVT, Custom);
667 setOperationAction(ISD::RESET_FPENV, MVT::Other, Custom);
668 setOperationAction(ISD::GET_FPMODE, XLenVT, Custom);
669 setOperationAction(ISD::SET_FPMODE, XLenVT, Custom);
670 setOperationAction(ISD::RESET_FPMODE, MVT::Other, Custom);
671 }
672
675 XLenVT, Custom);
676
678
679 if (Subtarget.is64Bit())
681
682 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
683 // Unfortunately this can't be determined just from the ISA naming string.
684 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
685 Subtarget.is64Bit() ? Legal : Custom);
686 setOperationAction(ISD::READSTEADYCOUNTER, MVT::i64,
687 Subtarget.is64Bit() ? Legal : Custom);
688
689 if (Subtarget.is64Bit()) {
690 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
691 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
692 }
693
694 setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Legal);
696 if (Subtarget.is64Bit())
698
699 if (Subtarget.hasVendorXMIPSCBOP())
700 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
701 else if (Subtarget.hasStdExtZicbop())
702 setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
703
704 if (Subtarget.hasStdExtZalrsc()) {
705 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
706 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
708 else
710 } else if (Subtarget.hasForcedAtomics()) {
711 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
712 } else {
714 }
715
716 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
717
719
720 if (getTargetMachine().getTargetTriple().isOSLinux()) {
721 // Custom lowering of llvm.clear_cache.
723 }
724
725 if (Subtarget.hasVInstructions()) {
727
728 setOperationAction(ISD::VSCALE, XLenVT, Custom);
729
730 // RVV intrinsics may have illegal operands.
731 // We also need to custom legalize vmv.x.s.
734 {MVT::i8, MVT::i16}, Custom);
735 if (Subtarget.is64Bit())
737 MVT::i32, Custom);
738 else
740 MVT::i64, Custom);
741
743 MVT::Other, Custom);
744
745 static const unsigned IntegerVPOps[] = {
746 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
747 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
748 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
749 ISD::VP_XOR, ISD::VP_SRA, ISD::VP_SRL,
750 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
751 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
752 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
753 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
754 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
755 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
756 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
757 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
758 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
759 ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF,
760 ISD::EXPERIMENTAL_VP_SPLAT};
761
762 static const unsigned FloatingPointVPOps[] = {
763 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
764 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
765 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
766 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
767 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
768 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
769 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
770 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
771 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
772 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
773 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
774 ISD::VP_LLRINT, ISD::VP_REDUCE_FMINIMUM,
775 ISD::VP_REDUCE_FMAXIMUM, ISD::EXPERIMENTAL_VP_SPLAT};
776
777 static const unsigned IntegerVecReduceOps[] = {
778 ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,
779 ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN,
780 ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN};
781
782 static const unsigned FloatingPointVecReduceOps[] = {
783 ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_FMIN,
784 ISD::VECREDUCE_FMAX, ISD::VECREDUCE_FMINIMUM, ISD::VECREDUCE_FMAXIMUM};
785
786 static const unsigned FloatingPointLibCallOps[] = {
787 ISD::FREM, ISD::FPOW, ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP,
788 ISD::FEXP2, ISD::FEXP10, ISD::FLOG, ISD::FLOG2, ISD::FLOG10};
789
790 if (!Subtarget.is64Bit()) {
791 // We must custom-lower certain vXi64 operations on RV32 due to the vector
792 // element type being illegal.
794 MVT::i64, Custom);
795
796 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
797
798 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
799 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
800 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
801 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
802 MVT::i64, Custom);
803 }
804
805 for (MVT VT : BoolVecVTs) {
806 if (!isTypeLegal(VT))
807 continue;
808
810
811 // Mask VTs are custom-expanded into a series of standard nodes
815 VT, Custom);
816
818 Custom);
819
821 setOperationAction({ISD::SELECT_CC, ISD::VSELECT, ISD::VP_SELECT}, VT,
822 Expand);
823 setOperationAction(ISD::VP_MERGE, VT, Custom);
824
825 setOperationAction({ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT,
826 Custom);
827
828 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
829
831 {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
832 Custom);
833
835 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
836 Custom);
837
838 // RVV has native int->float & float->int conversions where the
839 // element type sizes are within one power-of-two of each other. Any
840 // wider distances between type sizes have to be lowered as sequences
841 // which progressively narrow the gap in stages.
846 VT, Custom);
848 Custom);
849
850 // Expand all extending loads to types larger than this, and truncating
851 // stores from types larger than this.
853 setTruncStoreAction(VT, OtherVT, Expand);
855 OtherVT, Expand);
856 }
857
858 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
859 ISD::VP_TRUNCATE, ISD::VP_SETCC},
860 VT, Custom);
861
864
866
867 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
868 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
869 setOperationAction(ISD::EXPERIMENTAL_VP_SPLAT, VT, Custom);
870
873 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
874 }
875
876 for (MVT VT : IntVecVTs) {
877 if (!isTypeLegal(VT))
878 continue;
879
882
883 // Vectors implement MULHS/MULHU.
885
886 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
887 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
889
891 Legal);
892
894
895 // Custom-lower extensions and truncations from/to mask types.
897 VT, Custom);
898
899 // RVV has native int->float & float->int conversions where the
900 // element type sizes are within one power-of-two of each other. Any
901 // wider distances between type sizes have to be lowered as sequences
902 // which progressively narrow the gap in stages.
907 VT, Custom);
909 Custom);
913 VT, Legal);
914
915 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
916 // nodes which truncate by one power of two at a time.
919 Custom);
920
921 // Custom-lower insert/extract operations to simplify patterns.
923 Custom);
924
925 // Custom-lower reduction operations to set up the corresponding custom
926 // nodes' operands.
927 setOperationAction(IntegerVecReduceOps, VT, Custom);
928
929 setOperationAction(IntegerVPOps, VT, Custom);
930
931 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
932
933 setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
934 VT, Custom);
935
937 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
938 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
939 VT, Custom);
940 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
941
944 VT, Custom);
945
948
950
952 setTruncStoreAction(VT, OtherVT, Expand);
954 OtherVT, Expand);
955 }
956
959
960 // Splice
962
963 if (Subtarget.hasStdExtZvkb()) {
965 setOperationAction(ISD::VP_BSWAP, VT, Custom);
966 } else {
967 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
969 }
970
971 if (Subtarget.hasStdExtZvbb()) {
973 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
974 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
975 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
976 VT, Custom);
977 } else {
978 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
980 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
981 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
982 VT, Expand);
983
984 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
985 // range of f32.
986 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
987 if (isTypeLegal(FloatVT)) {
989 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
990 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
991 VT, Custom);
992 }
993 }
994
996 }
997
998 for (MVT VT : VecTupleVTs) {
999 if (!isTypeLegal(VT))
1000 continue;
1001
1002 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1003 }
1004
1005 // Expand various CCs to best match the RVV ISA, which natively supports UNE
1006 // but no other unordered comparisons, and supports all ordered comparisons
1007 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
1008 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
1009 // and we pattern-match those back to the "original", swapping operands once
1010 // more. This way we catch both operations and both "vf" and "fv" forms with
1011 // fewer patterns.
1012 static const ISD::CondCode VFPCCToExpand[] = {
1016 };
1017
1018 // TODO: support more ops.
1019 static const unsigned ZvfhminZvfbfminPromoteOps[] = {
1020 ISD::FMINNUM,
1021 ISD::FMAXNUM,
1022 ISD::FMINIMUMNUM,
1023 ISD::FMAXIMUMNUM,
1024 ISD::FADD,
1025 ISD::FSUB,
1026 ISD::FMUL,
1027 ISD::FMA,
1028 ISD::FDIV,
1029 ISD::FSQRT,
1030 ISD::FCEIL,
1031 ISD::FTRUNC,
1032 ISD::FFLOOR,
1033 ISD::FROUND,
1034 ISD::FROUNDEVEN,
1035 ISD::FRINT,
1036 ISD::FNEARBYINT,
1038 ISD::SETCC,
1039 ISD::FMAXIMUM,
1040 ISD::FMINIMUM,
1047 ISD::VECREDUCE_FMIN,
1048 ISD::VECREDUCE_FMAX,
1049 ISD::VECREDUCE_FMINIMUM,
1050 ISD::VECREDUCE_FMAXIMUM};
1051
1052 // TODO: support more vp ops.
1053 static const unsigned ZvfhminZvfbfminPromoteVPOps[] = {
1054 ISD::VP_FADD,
1055 ISD::VP_FSUB,
1056 ISD::VP_FMUL,
1057 ISD::VP_FDIV,
1058 ISD::VP_FMA,
1059 ISD::VP_REDUCE_FMIN,
1060 ISD::VP_REDUCE_FMAX,
1061 ISD::VP_SQRT,
1062 ISD::VP_FMINNUM,
1063 ISD::VP_FMAXNUM,
1064 ISD::VP_FCEIL,
1065 ISD::VP_FFLOOR,
1066 ISD::VP_FROUND,
1067 ISD::VP_FROUNDEVEN,
1068 ISD::VP_FROUNDTOZERO,
1069 ISD::VP_FRINT,
1070 ISD::VP_FNEARBYINT,
1071 ISD::VP_SETCC,
1072 ISD::VP_FMINIMUM,
1073 ISD::VP_FMAXIMUM,
1074 ISD::VP_REDUCE_FMINIMUM,
1075 ISD::VP_REDUCE_FMAXIMUM};
1076
1077 // Sets common operation actions on RVV floating-point vector types.
1078 const auto SetCommonVFPActions = [&](MVT VT) {
1080 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
1081 // sizes are within one power-of-two of each other. Therefore conversions
1082 // between vXf16 and vXf64 must be lowered as sequences which convert via
1083 // vXf32.
1084 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1085 setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
1086 setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);
1087 // Custom-lower insert/extract operations to simplify patterns.
1089 Custom);
1090 // Expand various condition codes (explained above).
1091 setCondCodeAction(VFPCCToExpand, VT, Expand);
1092
1094 {ISD::FMINNUM, ISD::FMAXNUM, ISD::FMAXIMUMNUM, ISD::FMINIMUMNUM}, VT,
1095 Legal);
1096 setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, VT, Custom);
1097
1098 setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
1099 ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT,
1101 VT, Custom);
1102
1103 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1104
1105 // Expand FP operations that need libcalls.
1106 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1107
1109
1110 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1111
1112 setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
1113 VT, Custom);
1114
1116 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1117 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1118 VT, Custom);
1119 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1120
1123
1126 VT, Custom);
1127
1130
1132 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1133 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1134
1135 setOperationAction(FloatingPointVPOps, VT, Custom);
1136
1138 Custom);
1141 VT, Legal);
1146 VT, Custom);
1147
1149 };
1150
1151 // Sets common extload/truncstore actions on RVV floating-point vector
1152 // types.
1153 const auto SetCommonVFPExtLoadTruncStoreActions =
1154 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1155 for (auto SmallVT : SmallerVTs) {
1156 setTruncStoreAction(VT, SmallVT, Expand);
1157 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1158 }
1159 };
1160
1161 // Sets common actions for f16 and bf16 for when there's only
1162 // zvfhmin/zvfbfmin and we need to promote to f32 for most operations.
1163 const auto SetCommonPromoteToF32Actions = [&](MVT VT) {
1164 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1166 Custom);
1167 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1168 setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
1169 setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);
1170 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1171 Custom);
1173 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom);
1179 VT, Custom);
1180 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1181 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1182 MVT EltVT = VT.getVectorElementType();
1183 if (isTypeLegal(EltVT))
1184 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT,
1186 VT, Custom);
1187 else
1188 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT},
1189 EltVT, Custom);
1190 setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
1191 ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD,
1192 ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1193 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1194 ISD::VP_SCATTER},
1195 VT, Custom);
1196 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1197
1198 setOperationAction(ISD::FNEG, VT, Expand);
1199 setOperationAction(ISD::FABS, VT, Expand);
1201
1202 // Expand FP operations that need libcalls.
1203 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1204
1205 // Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.
1206 if (getLMUL(VT) == RISCVVType::LMUL_8) {
1207 setOperationAction(ZvfhminZvfbfminPromoteOps, VT, Custom);
1208 setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom);
1209 } else {
1210 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1211 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1212 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1213 }
1214 };
1215
1216 // Sets common actions for zvfbfa, some of instructions are supported
1217 // natively so that we don't need to promote them.
1218 const auto SetZvfbfaActions = [&](MVT VT) {
1219 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1221 Custom);
1222 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1223 setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
1224 setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);
1225 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1226 Custom);
1228 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom);
1234 VT, Custom);
1235 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1236 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1237
1240
1241 MVT EltVT = VT.getVectorElementType();
1242 if (isTypeLegal(EltVT))
1243 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT,
1245 VT, Custom);
1246 else
1247 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT},
1248 EltVT, Custom);
1249 setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
1250 ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD,
1251 ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1252 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1253 ISD::VP_SCATTER},
1254 VT, Custom);
1255 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1256
1257 // Expand FP operations that need libcalls.
1258 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1259
1260 // Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.
1261 if (getLMUL(VT) == RISCVVType::LMUL_8) {
1262 setOperationAction(ZvfhminZvfbfminPromoteOps, VT, Custom);
1263 setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom);
1264 } else {
1265 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1266 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1267 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1268 }
1269 };
1270
1271 if (Subtarget.hasVInstructionsF16()) {
1272 for (MVT VT : F16VecVTs) {
1273 if (!isTypeLegal(VT))
1274 continue;
1275 SetCommonVFPActions(VT);
1276 }
1277 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1278 for (MVT VT : F16VecVTs) {
1279 if (!isTypeLegal(VT))
1280 continue;
1281 SetCommonPromoteToF32Actions(VT);
1282 }
1283 }
1284
1285 if (Subtarget.hasVInstructionsBF16()) {
1286 for (MVT VT : BF16VecVTs) {
1287 if (!isTypeLegal(VT))
1288 continue;
1289 SetZvfbfaActions(VT);
1290 }
1291 } else if (Subtarget.hasVInstructionsBF16Minimal()) {
1292 for (MVT VT : BF16VecVTs) {
1293 if (!isTypeLegal(VT))
1294 continue;
1295 SetCommonPromoteToF32Actions(VT);
1296 }
1297 }
1298
1299 if (Subtarget.hasVInstructionsF32()) {
1300 for (MVT VT : F32VecVTs) {
1301 if (!isTypeLegal(VT))
1302 continue;
1303 SetCommonVFPActions(VT);
1304 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1305 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1306 }
1307 }
1308
1309 if (Subtarget.hasVInstructionsF64()) {
1310 for (MVT VT : F64VecVTs) {
1311 if (!isTypeLegal(VT))
1312 continue;
1313 SetCommonVFPActions(VT);
1314 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1315 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1316 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1317 }
1318 }
1319
1320 if (Subtarget.useRVVForFixedLengthVectors()) {
1322 if (!useRVVForFixedLengthVectorVT(VT))
1323 continue;
1324
1325 // By default everything must be expanded.
1326 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1329 setTruncStoreAction(VT, OtherVT, Expand);
1331 OtherVT, Expand);
1332 }
1333
1334 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1335 // expansion to a build_vector of 0s.
1337
1338 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1340 Custom);
1341
1344 Custom);
1345
1347 VT, Custom);
1348
1350 VT, Custom);
1351
1353
1354 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1355
1357
1359
1362 Custom);
1363
1364 setOperationAction(ISD::BITCAST, VT, Custom);
1365
1367 {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
1368 Custom);
1369
1371 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1372 Custom);
1373
1375 {
1384 },
1385 VT, Custom);
1387 Custom);
1388
1390
1391 // Operations below are different for between masks and other vectors.
1392 if (VT.getVectorElementType() == MVT::i1) {
1393 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1394 ISD::OR, ISD::XOR},
1395 VT, Custom);
1396
1397 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1398 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1399 VT, Custom);
1400
1401 setOperationAction(ISD::VP_MERGE, VT, Custom);
1402
1403 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1404 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1405 continue;
1406 }
1407
1408 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1409 // it before type legalization for i64 vectors on RV32. It will then be
1410 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1411 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1412 // improvements first.
1413 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1416
1417 // Lower BUILD_VECTOR with i64 type to VID on RV32 if possible.
1419 }
1420
1422 {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom);
1423
1424 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1425 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1426 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1427 ISD::VP_SCATTER},
1428 VT, Custom);
1429 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1430
1434 VT, Custom);
1435
1438
1440
1441 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1442 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1444
1448 VT, Custom);
1449
1451
1454
1455 // Custom-lower reduction operations to set up the corresponding custom
1456 // nodes' operands.
1457 setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_SMAX,
1458 ISD::VECREDUCE_SMIN, ISD::VECREDUCE_UMAX,
1459 ISD::VECREDUCE_UMIN},
1460 VT, Custom);
1461
1462 setOperationAction(IntegerVPOps, VT, Custom);
1463
1464 if (Subtarget.hasStdExtZvkb())
1466
1467 if (Subtarget.hasStdExtZvbb()) {
1470 VT, Custom);
1471 } else {
1472 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1473 // range of f32.
1474 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1475 if (isTypeLegal(FloatVT))
1478 Custom);
1479 }
1480
1482 }
1483
1485 // There are no extending loads or truncating stores.
1486 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1487 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1488 setTruncStoreAction(VT, InnerVT, Expand);
1489 }
1490
1491 if (!useRVVForFixedLengthVectorVT(VT))
1492 continue;
1493
1494 // By default everything must be expanded.
1495 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1497
1498 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1499 // expansion to a build_vector of 0s.
1501
1506 VT, Custom);
1507 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1508 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1509
1511 VT, Custom);
1512
1513 setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
1514 ISD::MGATHER, ISD::MSCATTER},
1515 VT, Custom);
1516 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER,
1517 ISD::VP_SCATTER, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1518 ISD::EXPERIMENTAL_VP_STRIDED_STORE},
1519 VT, Custom);
1520 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1521
1522 setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
1524 Custom);
1525
1526 if (VT.getVectorElementType() == MVT::f16 &&
1527 !Subtarget.hasVInstructionsF16()) {
1528 setOperationAction(ISD::BITCAST, VT, Custom);
1529 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1531 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1532 Custom);
1533 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT,
1534 Custom);
1535 setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
1536 setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);
1537 if (Subtarget.hasStdExtZfhmin()) {
1539 } else {
1540 // We need to custom legalize f16 build vectors if Zfhmin isn't
1541 // available.
1543 }
1544 setOperationAction(ISD::FNEG, VT, Expand);
1545 setOperationAction(ISD::FABS, VT, Expand);
1547 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1548 // Don't promote f16 vector operations to f32 if f32 vector type is
1549 // not legal.
1550 // TODO: could split the f16 vector into two vectors and do promotion.
1551 if (!isTypeLegal(F32VecVT))
1552 continue;
1553 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1554 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1555 continue;
1556 }
1557
1558 if (VT.getVectorElementType() == MVT::bf16) {
1559 setOperationAction(ISD::BITCAST, VT, Custom);
1560 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1561 setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
1562 setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);
1563 if (Subtarget.hasStdExtZfbfmin()) {
1565 } else {
1566 // We need to custom legalize bf16 build vectors if Zfbfmin isn't
1567 // available.
1569 }
1570 if (Subtarget.hasStdExtZvfbfa()) {
1573 }
1575 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1576 Custom);
1577 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1578 // Don't promote f16 vector operations to f32 if f32 vector type is
1579 // not legal.
1580 // TODO: could split the f16 vector into two vectors and do promotion.
1581 if (!isTypeLegal(F32VecVT))
1582 continue;
1583 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1584 // TODO: Promote VP ops to fp32.
1585 continue;
1586 }
1587
1589 Custom);
1590
1592 ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT,
1593 ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM,
1594 ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM, ISD::IS_FPCLASS,
1595 ISD::FMAXIMUM, ISD::FMINIMUM},
1596 VT, Custom);
1597
1598 setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
1599 ISD::FROUNDEVEN, ISD::FRINT, ISD::LRINT,
1600 ISD::LLRINT, ISD::LROUND, ISD::LLROUND,
1601 ISD::FNEARBYINT},
1602 VT, Custom);
1603
1604 setCondCodeAction(VFPCCToExpand, VT, Expand);
1605
1608
1609 setOperationAction(ISD::BITCAST, VT, Custom);
1610
1611 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1612
1613 setOperationAction(FloatingPointVPOps, VT, Custom);
1614
1621 VT, Custom);
1622 }
1623
1624 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1625 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32}, Custom);
1626 if (Subtarget.is64Bit())
1627 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
1628 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1629 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
1630 if (Subtarget.hasStdExtZfbfmin())
1631 setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
1632 if (Subtarget.hasStdExtFOrZfinx())
1633 setOperationAction(ISD::BITCAST, MVT::f32, Custom);
1634 if (Subtarget.hasStdExtDOrZdinx())
1635 setOperationAction(ISD::BITCAST, MVT::f64, Custom);
1636 }
1637 }
1638
1639 if (Subtarget.hasStdExtZaamo())
1640 setOperationAction(ISD::ATOMIC_LOAD_SUB, XLenVT, Expand);
1641
1642 if (Subtarget.hasForcedAtomics()) {
1643 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1645 {ISD::ATOMIC_CMP_SWAP, ISD::ATOMIC_SWAP, ISD::ATOMIC_LOAD_ADD,
1646 ISD::ATOMIC_LOAD_SUB, ISD::ATOMIC_LOAD_AND, ISD::ATOMIC_LOAD_OR,
1647 ISD::ATOMIC_LOAD_XOR, ISD::ATOMIC_LOAD_NAND, ISD::ATOMIC_LOAD_MIN,
1648 ISD::ATOMIC_LOAD_MAX, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX},
1649 XLenVT, LibCall);
1650 }
1651
1652 if (Subtarget.hasVendorXTHeadMemIdx()) {
1653 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1654 setIndexedLoadAction(im, MVT::i8, Legal);
1655 setIndexedStoreAction(im, MVT::i8, Legal);
1656 setIndexedLoadAction(im, MVT::i16, Legal);
1657 setIndexedStoreAction(im, MVT::i16, Legal);
1658 setIndexedLoadAction(im, MVT::i32, Legal);
1659 setIndexedStoreAction(im, MVT::i32, Legal);
1660
1661 if (Subtarget.is64Bit()) {
1662 setIndexedLoadAction(im, MVT::i64, Legal);
1663 setIndexedStoreAction(im, MVT::i64, Legal);
1664 }
1665 }
1666 }
1667
1668 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
1672
1676 }
1677
1678 // zve32x is broken for partial_reduce_umla, but let's not make it worse.
1679 if (Subtarget.hasStdExtZvqdotq() && Subtarget.getELen() >= 64) {
1680 static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
1681 ISD::PARTIAL_REDUCE_UMLA,
1682 ISD::PARTIAL_REDUCE_SUMLA};
1683 setPartialReduceMLAAction(MLAOps, MVT::nxv1i32, MVT::nxv4i8, Custom);
1684 setPartialReduceMLAAction(MLAOps, MVT::nxv2i32, MVT::nxv8i8, Custom);
1685 setPartialReduceMLAAction(MLAOps, MVT::nxv4i32, MVT::nxv16i8, Custom);
1686 setPartialReduceMLAAction(MLAOps, MVT::nxv8i32, MVT::nxv32i8, Custom);
1687 setPartialReduceMLAAction(MLAOps, MVT::nxv16i32, MVT::nxv64i8, Custom);
1688
1689 if (Subtarget.useRVVForFixedLengthVectors()) {
1691 if (VT.getVectorElementType() != MVT::i32 ||
1692 !useRVVForFixedLengthVectorVT(VT))
1693 continue;
1694 ElementCount EC = VT.getVectorElementCount();
1695 MVT ArgVT = MVT::getVectorVT(MVT::i8, EC.multiplyCoefficientBy(4));
1696 setPartialReduceMLAAction(MLAOps, VT, ArgVT, Custom);
1697 }
1698 }
1699 }
1700
1701 // Customize load and store operation for bf16 if zfh isn't enabled.
1702 if (Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh()) {
1703 setOperationAction(ISD::LOAD, MVT::bf16, Custom);
1704 setOperationAction(ISD::STORE, MVT::bf16, Custom);
1705 }
1706
1707 // Function alignments.
1708 const Align FunctionAlignment(Subtarget.hasStdExtZca() ? 2 : 4);
1709 setMinFunctionAlignment(FunctionAlignment);
1710 // Set preferred alignments.
1711 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
1712 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
1713
1719
1720 if (Subtarget.hasStdExtFOrZfinx())
1721 setTargetDAGCombine({ISD::FADD, ISD::FMAXNUM, ISD::FMINNUM, ISD::FMUL});
1722
1723 if (Subtarget.hasStdExtZbb())
1725
1726 if ((Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) ||
1727 Subtarget.hasVInstructions())
1729
1730 if (Subtarget.hasStdExtZbkb())
1732
1733 if (Subtarget.hasStdExtFOrZfinx())
1736 if (Subtarget.hasVInstructions())
1738 {ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER,
1739 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA,
1740 ISD::SRL, ISD::SHL, ISD::STORE,
1742 ISD::VP_STORE, ISD::VP_TRUNCATE, ISD::EXPERIMENTAL_VP_REVERSE,
1746 ISD::VSELECT, ISD::VECREDUCE_ADD});
1747
1748 if (Subtarget.hasVendorXTHeadMemPair())
1749 setTargetDAGCombine({ISD::LOAD, ISD::STORE});
1750 if (Subtarget.useRVVForFixedLengthVectors())
1751 setTargetDAGCombine(ISD::BITCAST);
1752
1753 setMaxDivRemBitWidthSupported(Subtarget.is64Bit() ? 128 : 64);
1754
1755 // Disable strict node mutation.
1756 IsStrictFPEnabled = true;
1757 EnableExtLdPromotion = true;
1758
1759 // Let the subtarget decide if a predictable select is more expensive than the
1760 // corresponding branch. This information is used in CGP/SelectOpt to decide
1761 // when to convert selects into branches.
1762 PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive();
1763
1764 MaxStoresPerMemsetOptSize = Subtarget.getMaxStoresPerMemset(/*OptSize=*/true);
1765 MaxStoresPerMemset = Subtarget.getMaxStoresPerMemset(/*OptSize=*/false);
1766
1767 MaxGluedStoresPerMemcpy = Subtarget.getMaxGluedStoresPerMemcpy();
1768 MaxStoresPerMemcpyOptSize = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/true);
1769 MaxStoresPerMemcpy = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/false);
1770
1772 Subtarget.getMaxStoresPerMemmove(/*OptSize=*/true);
1773 MaxStoresPerMemmove = Subtarget.getMaxStoresPerMemmove(/*OptSize=*/false);
1774
1775 MaxLoadsPerMemcmpOptSize = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/true);
1776 MaxLoadsPerMemcmp = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/false);
1777}
1778
1780 LLVMContext &Context,
1781 EVT VT) const {
1782 if (!VT.isVector())
1783 return getPointerTy(DL);
1784 if (Subtarget.hasVInstructions() &&
1785 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1786 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1788}
1789
1791 return Subtarget.getXLenVT();
1792}
1793
1794// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1795bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1796 unsigned VF,
1797 bool IsScalable) const {
1798 if (!Subtarget.hasVInstructions())
1799 return true;
1800
1801 if (!IsScalable)
1802 return true;
1803
1804 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1805 return true;
1806
1807 // Don't allow VF=1 if those types are't legal.
1808 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1809 return true;
1810
1811 // VLEN=32 support is incomplete.
1812 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1813 return true;
1814
1815 // The maximum VF is for the smallest element width with LMUL=8.
1816 // VF must be a power of 2.
1817 unsigned MaxVF = RISCV::RVVBytesPerBlock * 8;
1818 return VF > MaxVF || !isPowerOf2_32(VF);
1819}
1820
1822 return !Subtarget.hasVInstructions() ||
1823 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
1824}
1825
1827 const CallInst &I,
1828 MachineFunction &MF,
1829 unsigned Intrinsic) const {
1830 auto &DL = I.getDataLayout();
1831
1832 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1833 bool IsUnitStrided, bool UsePtrVal = false) {
1834 Info.opc = IsStore ? ISD::INTRINSIC_VOID : ISD::INTRINSIC_W_CHAIN;
1835 // We can't use ptrVal if the intrinsic can access memory before the
1836 // pointer. This means we can't use it for strided or indexed intrinsics.
1837 if (UsePtrVal)
1838 Info.ptrVal = I.getArgOperand(PtrOp);
1839 else
1840 Info.fallbackAddressSpace =
1841 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1842 Type *MemTy;
1843 if (IsStore) {
1844 // Store value is the first operand.
1845 MemTy = I.getArgOperand(0)->getType();
1846 } else {
1847 // Use return type. If it's segment load, return type is a struct.
1848 MemTy = I.getType();
1849 if (MemTy->isStructTy())
1850 MemTy = MemTy->getStructElementType(0);
1851 }
1852 if (!IsUnitStrided)
1853 MemTy = MemTy->getScalarType();
1854
1855 Info.memVT = getValueType(DL, MemTy);
1856 if (MemTy->isTargetExtTy()) {
1857 // RISC-V vector tuple type's alignment type should be its element type.
1858 if (cast<TargetExtType>(MemTy)->getName() == "riscv.vector.tuple")
1859 MemTy = Type::getIntNTy(
1860 MemTy->getContext(),
1861 1 << cast<ConstantInt>(I.getArgOperand(I.arg_size() - 1))
1862 ->getZExtValue());
1863 Info.align = DL.getABITypeAlign(MemTy);
1864 } else {
1865 Info.align = Align(DL.getTypeStoreSize(MemTy->getScalarType()));
1866 }
1867 Info.size = MemoryLocation::UnknownSize;
1868 Info.flags |=
1870 return true;
1871 };
1872
1873 if (I.hasMetadata(LLVMContext::MD_nontemporal))
1875
1877 switch (Intrinsic) {
1878 default:
1879 return false;
1880 case Intrinsic::riscv_masked_atomicrmw_xchg:
1881 case Intrinsic::riscv_masked_atomicrmw_add:
1882 case Intrinsic::riscv_masked_atomicrmw_sub:
1883 case Intrinsic::riscv_masked_atomicrmw_nand:
1884 case Intrinsic::riscv_masked_atomicrmw_max:
1885 case Intrinsic::riscv_masked_atomicrmw_min:
1886 case Intrinsic::riscv_masked_atomicrmw_umax:
1887 case Intrinsic::riscv_masked_atomicrmw_umin:
1888 case Intrinsic::riscv_masked_cmpxchg:
1889 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
1890 // narrow atomic operation. These will be expanded to an LR/SC loop that
1891 // reads/writes to/from an aligned 4 byte location. And, or, shift, etc.
1892 // will be used to modify the appropriate part of the 4 byte data and
1893 // preserve the rest.
1894 Info.opc = ISD::INTRINSIC_W_CHAIN;
1895 Info.memVT = MVT::i32;
1896 Info.ptrVal = I.getArgOperand(0);
1897 Info.offset = 0;
1898 Info.align = Align(4);
1901 return true;
1902 case Intrinsic::riscv_seg2_load_mask:
1903 case Intrinsic::riscv_seg3_load_mask:
1904 case Intrinsic::riscv_seg4_load_mask:
1905 case Intrinsic::riscv_seg5_load_mask:
1906 case Intrinsic::riscv_seg6_load_mask:
1907 case Intrinsic::riscv_seg7_load_mask:
1908 case Intrinsic::riscv_seg8_load_mask:
1909 case Intrinsic::riscv_sseg2_load_mask:
1910 case Intrinsic::riscv_sseg3_load_mask:
1911 case Intrinsic::riscv_sseg4_load_mask:
1912 case Intrinsic::riscv_sseg5_load_mask:
1913 case Intrinsic::riscv_sseg6_load_mask:
1914 case Intrinsic::riscv_sseg7_load_mask:
1915 case Intrinsic::riscv_sseg8_load_mask:
1916 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1917 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1918 case Intrinsic::riscv_seg2_store_mask:
1919 case Intrinsic::riscv_seg3_store_mask:
1920 case Intrinsic::riscv_seg4_store_mask:
1921 case Intrinsic::riscv_seg5_store_mask:
1922 case Intrinsic::riscv_seg6_store_mask:
1923 case Intrinsic::riscv_seg7_store_mask:
1924 case Intrinsic::riscv_seg8_store_mask:
1925 // Operands are (vec, ..., vec, ptr, mask, vl)
1926 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1927 /*IsStore*/ true,
1928 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1929 case Intrinsic::riscv_sseg2_store_mask:
1930 case Intrinsic::riscv_sseg3_store_mask:
1931 case Intrinsic::riscv_sseg4_store_mask:
1932 case Intrinsic::riscv_sseg5_store_mask:
1933 case Intrinsic::riscv_sseg6_store_mask:
1934 case Intrinsic::riscv_sseg7_store_mask:
1935 case Intrinsic::riscv_sseg8_store_mask:
1936 // Operands are (vec, ..., vec, ptr, offset, mask, vl)
1937 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1938 /*IsStore*/ true,
1939 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1940 case Intrinsic::riscv_vlm:
1941 return SetRVVLoadStoreInfo(/*PtrOp*/ 0,
1942 /*IsStore*/ false,
1943 /*IsUnitStrided*/ true,
1944 /*UsePtrVal*/ true);
1945 case Intrinsic::riscv_vle:
1946 case Intrinsic::riscv_vle_mask:
1947 case Intrinsic::riscv_vleff:
1948 case Intrinsic::riscv_vleff_mask:
1949 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1950 /*IsStore*/ false,
1951 /*IsUnitStrided*/ true,
1952 /*UsePtrVal*/ true);
1953 case Intrinsic::riscv_vsm:
1954 case Intrinsic::riscv_vse:
1955 case Intrinsic::riscv_vse_mask:
1956 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1957 /*IsStore*/ true,
1958 /*IsUnitStrided*/ true,
1959 /*UsePtrVal*/ true);
1960 case Intrinsic::riscv_vlse:
1961 case Intrinsic::riscv_vlse_mask:
1962 case Intrinsic::riscv_vloxei:
1963 case Intrinsic::riscv_vloxei_mask:
1964 case Intrinsic::riscv_vluxei:
1965 case Intrinsic::riscv_vluxei_mask:
1966 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1967 /*IsStore*/ false,
1968 /*IsUnitStrided*/ false);
1969 case Intrinsic::riscv_vsse:
1970 case Intrinsic::riscv_vsse_mask:
1971 case Intrinsic::riscv_vsoxei:
1972 case Intrinsic::riscv_vsoxei_mask:
1973 case Intrinsic::riscv_vsuxei:
1974 case Intrinsic::riscv_vsuxei_mask:
1975 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1976 /*IsStore*/ true,
1977 /*IsUnitStrided*/ false);
1978 case Intrinsic::riscv_vlseg2:
1979 case Intrinsic::riscv_vlseg3:
1980 case Intrinsic::riscv_vlseg4:
1981 case Intrinsic::riscv_vlseg5:
1982 case Intrinsic::riscv_vlseg6:
1983 case Intrinsic::riscv_vlseg7:
1984 case Intrinsic::riscv_vlseg8:
1985 case Intrinsic::riscv_vlseg2ff:
1986 case Intrinsic::riscv_vlseg3ff:
1987 case Intrinsic::riscv_vlseg4ff:
1988 case Intrinsic::riscv_vlseg5ff:
1989 case Intrinsic::riscv_vlseg6ff:
1990 case Intrinsic::riscv_vlseg7ff:
1991 case Intrinsic::riscv_vlseg8ff:
1992 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1993 /*IsStore*/ false,
1994 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1995 case Intrinsic::riscv_vlseg2_mask:
1996 case Intrinsic::riscv_vlseg3_mask:
1997 case Intrinsic::riscv_vlseg4_mask:
1998 case Intrinsic::riscv_vlseg5_mask:
1999 case Intrinsic::riscv_vlseg6_mask:
2000 case Intrinsic::riscv_vlseg7_mask:
2001 case Intrinsic::riscv_vlseg8_mask:
2002 case Intrinsic::riscv_vlseg2ff_mask:
2003 case Intrinsic::riscv_vlseg3ff_mask:
2004 case Intrinsic::riscv_vlseg4ff_mask:
2005 case Intrinsic::riscv_vlseg5ff_mask:
2006 case Intrinsic::riscv_vlseg6ff_mask:
2007 case Intrinsic::riscv_vlseg7ff_mask:
2008 case Intrinsic::riscv_vlseg8ff_mask:
2009 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
2010 /*IsStore*/ false,
2011 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
2012 case Intrinsic::riscv_vlsseg2:
2013 case Intrinsic::riscv_vlsseg3:
2014 case Intrinsic::riscv_vlsseg4:
2015 case Intrinsic::riscv_vlsseg5:
2016 case Intrinsic::riscv_vlsseg6:
2017 case Intrinsic::riscv_vlsseg7:
2018 case Intrinsic::riscv_vlsseg8:
2019 case Intrinsic::riscv_vloxseg2:
2020 case Intrinsic::riscv_vloxseg3:
2021 case Intrinsic::riscv_vloxseg4:
2022 case Intrinsic::riscv_vloxseg5:
2023 case Intrinsic::riscv_vloxseg6:
2024 case Intrinsic::riscv_vloxseg7:
2025 case Intrinsic::riscv_vloxseg8:
2026 case Intrinsic::riscv_vluxseg2:
2027 case Intrinsic::riscv_vluxseg3:
2028 case Intrinsic::riscv_vluxseg4:
2029 case Intrinsic::riscv_vluxseg5:
2030 case Intrinsic::riscv_vluxseg6:
2031 case Intrinsic::riscv_vluxseg7:
2032 case Intrinsic::riscv_vluxseg8:
2033 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
2034 /*IsStore*/ false,
2035 /*IsUnitStrided*/ false);
2036 case Intrinsic::riscv_vlsseg2_mask:
2037 case Intrinsic::riscv_vlsseg3_mask:
2038 case Intrinsic::riscv_vlsseg4_mask:
2039 case Intrinsic::riscv_vlsseg5_mask:
2040 case Intrinsic::riscv_vlsseg6_mask:
2041 case Intrinsic::riscv_vlsseg7_mask:
2042 case Intrinsic::riscv_vlsseg8_mask:
2043 case Intrinsic::riscv_vloxseg2_mask:
2044 case Intrinsic::riscv_vloxseg3_mask:
2045 case Intrinsic::riscv_vloxseg4_mask:
2046 case Intrinsic::riscv_vloxseg5_mask:
2047 case Intrinsic::riscv_vloxseg6_mask:
2048 case Intrinsic::riscv_vloxseg7_mask:
2049 case Intrinsic::riscv_vloxseg8_mask:
2050 case Intrinsic::riscv_vluxseg2_mask:
2051 case Intrinsic::riscv_vluxseg3_mask:
2052 case Intrinsic::riscv_vluxseg4_mask:
2053 case Intrinsic::riscv_vluxseg5_mask:
2054 case Intrinsic::riscv_vluxseg6_mask:
2055 case Intrinsic::riscv_vluxseg7_mask:
2056 case Intrinsic::riscv_vluxseg8_mask:
2057 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 6,
2058 /*IsStore*/ false,
2059 /*IsUnitStrided*/ false);
2060 case Intrinsic::riscv_vsseg2:
2061 case Intrinsic::riscv_vsseg3:
2062 case Intrinsic::riscv_vsseg4:
2063 case Intrinsic::riscv_vsseg5:
2064 case Intrinsic::riscv_vsseg6:
2065 case Intrinsic::riscv_vsseg7:
2066 case Intrinsic::riscv_vsseg8:
2067 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
2068 /*IsStore*/ true,
2069 /*IsUnitStrided*/ false);
2070 case Intrinsic::riscv_vsseg2_mask:
2071 case Intrinsic::riscv_vsseg3_mask:
2072 case Intrinsic::riscv_vsseg4_mask:
2073 case Intrinsic::riscv_vsseg5_mask:
2074 case Intrinsic::riscv_vsseg6_mask:
2075 case Intrinsic::riscv_vsseg7_mask:
2076 case Intrinsic::riscv_vsseg8_mask:
2077 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
2078 /*IsStore*/ true,
2079 /*IsUnitStrided*/ false);
2080 case Intrinsic::riscv_vssseg2:
2081 case Intrinsic::riscv_vssseg3:
2082 case Intrinsic::riscv_vssseg4:
2083 case Intrinsic::riscv_vssseg5:
2084 case Intrinsic::riscv_vssseg6:
2085 case Intrinsic::riscv_vssseg7:
2086 case Intrinsic::riscv_vssseg8:
2087 case Intrinsic::riscv_vsoxseg2:
2088 case Intrinsic::riscv_vsoxseg3:
2089 case Intrinsic::riscv_vsoxseg4:
2090 case Intrinsic::riscv_vsoxseg5:
2091 case Intrinsic::riscv_vsoxseg6:
2092 case Intrinsic::riscv_vsoxseg7:
2093 case Intrinsic::riscv_vsoxseg8:
2094 case Intrinsic::riscv_vsuxseg2:
2095 case Intrinsic::riscv_vsuxseg3:
2096 case Intrinsic::riscv_vsuxseg4:
2097 case Intrinsic::riscv_vsuxseg5:
2098 case Intrinsic::riscv_vsuxseg6:
2099 case Intrinsic::riscv_vsuxseg7:
2100 case Intrinsic::riscv_vsuxseg8:
2101 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
2102 /*IsStore*/ true,
2103 /*IsUnitStrided*/ false);
2104 case Intrinsic::riscv_vssseg2_mask:
2105 case Intrinsic::riscv_vssseg3_mask:
2106 case Intrinsic::riscv_vssseg4_mask:
2107 case Intrinsic::riscv_vssseg5_mask:
2108 case Intrinsic::riscv_vssseg6_mask:
2109 case Intrinsic::riscv_vssseg7_mask:
2110 case Intrinsic::riscv_vssseg8_mask:
2111 case Intrinsic::riscv_vsoxseg2_mask:
2112 case Intrinsic::riscv_vsoxseg3_mask:
2113 case Intrinsic::riscv_vsoxseg4_mask:
2114 case Intrinsic::riscv_vsoxseg5_mask:
2115 case Intrinsic::riscv_vsoxseg6_mask:
2116 case Intrinsic::riscv_vsoxseg7_mask:
2117 case Intrinsic::riscv_vsoxseg8_mask:
2118 case Intrinsic::riscv_vsuxseg2_mask:
2119 case Intrinsic::riscv_vsuxseg3_mask:
2120 case Intrinsic::riscv_vsuxseg4_mask:
2121 case Intrinsic::riscv_vsuxseg5_mask:
2122 case Intrinsic::riscv_vsuxseg6_mask:
2123 case Intrinsic::riscv_vsuxseg7_mask:
2124 case Intrinsic::riscv_vsuxseg8_mask:
2125 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
2126 /*IsStore*/ true,
2127 /*IsUnitStrided*/ false);
2128 }
2129}
2130
2132 const AddrMode &AM, Type *Ty,
2133 unsigned AS,
2134 Instruction *I) const {
2135 // No global is ever allowed as a base.
2136 if (AM.BaseGV)
2137 return false;
2138
2139 // None of our addressing modes allows a scalable offset
2140 if (AM.ScalableOffset)
2141 return false;
2142
2143 // RVV instructions only support register addressing.
2144 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
2145 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
2146
2147 // Require a 12-bit signed offset.
2148 if (!isInt<12>(AM.BaseOffs))
2149 return false;
2150
2151 switch (AM.Scale) {
2152 case 0: // "r+i" or just "i", depending on HasBaseReg.
2153 break;
2154 case 1:
2155 if (!AM.HasBaseReg) // allow "r+i".
2156 break;
2157 return false; // disallow "r+r" or "r+r+i".
2158 default:
2159 return false;
2160 }
2161
2162 return true;
2163}
2164
2166 return isInt<12>(Imm);
2167}
2168
2170 return isInt<12>(Imm);
2171}
2172
2173// On RV32, 64-bit integers are split into their high and low parts and held
2174// in two different registers, so the trunc is free since the low register can
2175// just be used.
2176// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
2177// isTruncateFree?
2179 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
2180 return false;
2181 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
2182 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
2183 return (SrcBits == 64 && DestBits == 32);
2184}
2185
2187 // We consider i64->i32 free on RV64 since we have good selection of W
2188 // instructions that make promoting operations back to i64 free in many cases.
2189 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
2190 !DstVT.isInteger())
2191 return false;
2192 unsigned SrcBits = SrcVT.getSizeInBits();
2193 unsigned DestBits = DstVT.getSizeInBits();
2194 return (SrcBits == 64 && DestBits == 32);
2195}
2196
2198 EVT SrcVT = Val.getValueType();
2199 // free truncate from vnsrl and vnsra
2200 if (Subtarget.hasVInstructions() &&
2201 (Val.getOpcode() == ISD::SRL || Val.getOpcode() == ISD::SRA) &&
2202 SrcVT.isVector() && VT2.isVector()) {
2203 unsigned SrcBits = SrcVT.getVectorElementType().getSizeInBits();
2204 unsigned DestBits = VT2.getVectorElementType().getSizeInBits();
2205 if (SrcBits == DestBits * 2) {
2206 return true;
2207 }
2208 }
2209 return TargetLowering::isTruncateFree(Val, VT2);
2210}
2211
2213 // Zexts are free if they can be combined with a load.
2214 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
2215 // poorly with type legalization of compares preferring sext.
2216 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
2217 EVT MemVT = LD->getMemoryVT();
2218 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
2219 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
2220 LD->getExtensionType() == ISD::ZEXTLOAD))
2221 return true;
2222 }
2223
2224 return TargetLowering::isZExtFree(Val, VT2);
2225}
2226
2228 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
2229}
2230
2232 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
2233}
2234
2236 return Subtarget.hasCTZLike();
2237}
2238
2240 return Subtarget.hasCLZLike();
2241}
2242
2244 const Instruction &AndI) const {
2245 // We expect to be able to match a bit extraction instruction if the Zbs
2246 // extension is supported and the mask is a power of two. However, we
2247 // conservatively return false if the mask would fit in an ANDI instruction,
2248 // on the basis that it's possible the sinking+duplication of the AND in
2249 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
2250 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
2251 if (!Subtarget.hasBEXTILike())
2252 return false;
2254 if (!Mask)
2255 return false;
2256 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
2257}
2258
2260 EVT VT = Y.getValueType();
2261
2262 if (VT.isVector())
2263 return false;
2264
2265 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
2266 (!isa<ConstantSDNode>(Y) || cast<ConstantSDNode>(Y)->isOpaque());
2267}
2268
2270 EVT VT = Y.getValueType();
2271
2272 if (!VT.isVector())
2273 return hasAndNotCompare(Y);
2274
2275 return Subtarget.hasStdExtZvkb();
2276}
2277
2279 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
2280 if (Subtarget.hasStdExtZbs())
2281 return X.getValueType().isScalarInteger();
2282 auto *C = dyn_cast<ConstantSDNode>(Y);
2283 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
2284 if (Subtarget.hasVendorXTHeadBs())
2285 return C != nullptr;
2286 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
2287 return C && C->getAPIntValue().ule(10);
2288}
2289
2291 unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X,
2292 SDValue Y) const {
2293 if (SelectOpcode != ISD::VSELECT)
2294 return false;
2295
2296 // Only enable for rvv.
2297 if (!VT.isVector() || !Subtarget.hasVInstructions())
2298 return false;
2299
2300 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
2301 return false;
2302
2303 return true;
2304}
2305
2307 Type *Ty) const {
2308 assert(Ty->isIntegerTy());
2309
2310 unsigned BitSize = Ty->getIntegerBitWidth();
2311 if (BitSize > Subtarget.getXLen())
2312 return false;
2313
2314 // Fast path, assume 32-bit immediates are cheap.
2315 int64_t Val = Imm.getSExtValue();
2316 if (isInt<32>(Val))
2317 return true;
2318
2319 // A constant pool entry may be more aligned than the load we're trying to
2320 // replace. If we don't support unaligned scalar mem, prefer the constant
2321 // pool.
2322 // TODO: Can the caller pass down the alignment?
2323 if (!Subtarget.enableUnalignedScalarMem())
2324 return true;
2325
2326 // Prefer to keep the load if it would require many instructions.
2327 // This uses the same threshold we use for constant pools but doesn't
2328 // check useConstantPoolForLargeInts.
2329 // TODO: Should we keep the load only when we're definitely going to emit a
2330 // constant pool?
2331
2333 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
2334}
2335
2339 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
2340 SelectionDAG &DAG) const {
2341 // One interesting pattern that we'd want to form is 'bit extract':
2342 // ((1 >> Y) & 1) ==/!= 0
2343 // But we also need to be careful not to try to reverse that fold.
2344
2345 // Is this '((1 >> Y) & 1)'?
2346 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
2347 return false; // Keep the 'bit extract' pattern.
2348
2349 // Will this be '((1 >> Y) & 1)' after the transform?
2350 if (NewShiftOpcode == ISD::SRL && CC->isOne())
2351 return true; // Do form the 'bit extract' pattern.
2352
2353 // If 'X' is a constant, and we transform, then we will immediately
2354 // try to undo the fold, thus causing endless combine loop.
2355 // So only do the transform if X is not a constant. This matches the default
2356 // implementation of this function.
2357 return !XC;
2358}
2359
2361 unsigned Opc = VecOp.getOpcode();
2362
2363 // Assume target opcodes can't be scalarized.
2364 // TODO - do we have any exceptions?
2365 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
2366 return false;
2367
2368 // If the vector op is not supported, try to convert to scalar.
2369 EVT VecVT = VecOp.getValueType();
2371 return true;
2372
2373 // If the vector op is supported, but the scalar op is not, the transform may
2374 // not be worthwhile.
2375 // Permit a vector binary operation can be converted to scalar binary
2376 // operation which is custom lowered with illegal type.
2377 EVT ScalarVT = VecVT.getScalarType();
2378 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2379 isOperationCustom(Opc, ScalarVT);
2380}
2381
2383 const GlobalAddressSDNode *GA) const {
2384 // In order to maximise the opportunity for common subexpression elimination,
2385 // keep a separate ADD node for the global address offset instead of folding
2386 // it in the global address node. Later peephole optimisations may choose to
2387 // fold it back in when profitable.
2388 return false;
2389}
2390
2391// Returns 0-31 if the fli instruction is available for the type and this is
2392// legal FP immediate for the type. Returns -1 otherwise.
2394 if (!Subtarget.hasStdExtZfa())
2395 return -1;
2396
2397 bool IsSupportedVT = false;
2398 if (VT == MVT::f16) {
2399 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2400 } else if (VT == MVT::f32) {
2401 IsSupportedVT = true;
2402 } else if (VT == MVT::f64) {
2403 assert(Subtarget.hasStdExtD() && "Expect D extension");
2404 IsSupportedVT = true;
2405 }
2406
2407 if (!IsSupportedVT)
2408 return -1;
2409
2410 return RISCVLoadFPImm::getLoadFPImm(Imm);
2411}
2412
2414 bool ForCodeSize) const {
2415 bool IsLegalVT = false;
2416 if (VT == MVT::f16)
2417 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2418 else if (VT == MVT::f32)
2419 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2420 else if (VT == MVT::f64)
2421 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2422 else if (VT == MVT::bf16)
2423 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2424
2425 if (!IsLegalVT)
2426 return false;
2427
2428 if (getLegalZfaFPImm(Imm, VT) >= 0)
2429 return true;
2430
2431 // Some constants can be produced by fli+fneg.
2432 if (Imm.isNegative() && getLegalZfaFPImm(-Imm, VT) >= 0)
2433 return true;
2434
2435 // Cannot create a 64 bit floating-point immediate value for rv32.
2436 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2437 // td can handle +0.0 or -0.0 already.
2438 // -0.0 can be created by fmv + fneg.
2439 return Imm.isZero();
2440 }
2441
2442 // Special case: fmv + fneg
2443 if (Imm.isNegZero())
2444 return true;
2445
2446 // Building an integer and then converting requires a fmv at the end of
2447 // the integer sequence. The fmv is not required for Zfinx.
2448 const int FmvCost = Subtarget.hasStdExtZfinx() ? 0 : 1;
2449 const int Cost =
2450 FmvCost + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(),
2451 Subtarget.getXLen(), Subtarget);
2452 return Cost <= FPImmCost;
2453}
2454
2455// TODO: This is very conservative.
2457 unsigned Index) const {
2459 return false;
2460
2461 // Extracts from index 0 are just subreg extracts.
2462 if (Index == 0)
2463 return true;
2464
2465 // Only support extracting a fixed from a fixed vector for now.
2466 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2467 return false;
2468
2469 EVT EltVT = ResVT.getVectorElementType();
2470 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2471
2472 // The smallest type we can slide is i8.
2473 // TODO: We can extract index 0 from a mask vector without a slide.
2474 if (EltVT == MVT::i1)
2475 return false;
2476
2477 unsigned ResElts = ResVT.getVectorNumElements();
2478 unsigned SrcElts = SrcVT.getVectorNumElements();
2479
2480 unsigned MinVLen = Subtarget.getRealMinVLen();
2481 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2482
2483 // If we're extracting only data from the first VLEN bits of the source
2484 // then we can always do this with an m1 vslidedown.vx. Restricting the
2485 // Index ensures we can use a vslidedown.vi.
2486 // TODO: We can generalize this when the exact VLEN is known.
2487 if (Index + ResElts <= MinVLMAX && Index < 31)
2488 return true;
2489
2490 // Convervatively only handle extracting half of a vector.
2491 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2492 // the upper half of a vector until we have more test coverage.
2493 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2494 // a cheap extract. However, this case is important in practice for
2495 // shuffled extracts of longer vectors. How resolve?
2496 return (ResElts * 2) == SrcElts && (Index == 0 || Index == ResElts);
2497}
2498
2500 CallingConv::ID CC,
2501 EVT VT) const {
2502 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2503 // We might still end up using a GPR but that will be decided based on ABI.
2504 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2505 !Subtarget.hasStdExtZfhminOrZhinxmin())
2506 return MVT::f32;
2507
2508 MVT PartVT = TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
2509
2510 return PartVT;
2511}
2512
2513unsigned
2515 std::optional<MVT> RegisterVT) const {
2516 // Pair inline assembly operand
2517 if (VT == (Subtarget.is64Bit() ? MVT::i128 : MVT::i64) && RegisterVT &&
2518 *RegisterVT == MVT::Untyped)
2519 return 1;
2520
2521 return TargetLowering::getNumRegisters(Context, VT, RegisterVT);
2522}
2523
2525 CallingConv::ID CC,
2526 EVT VT) const {
2527 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2528 // We might still end up using a GPR but that will be decided based on ABI.
2529 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2530 !Subtarget.hasStdExtZfhminOrZhinxmin())
2531 return 1;
2532
2533 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
2534}
2535
2537 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2538 unsigned &NumIntermediates, MVT &RegisterVT) const {
2540 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2541
2542 return NumRegs;
2543}
2544
2545// Changes the condition code and swaps operands if necessary, so the SetCC
2546// operation matches one of the comparisons supported directly by branches
2547// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2548// with 1/-1.
2550 ISD::CondCode &CC, SelectionDAG &DAG,
2551 const RISCVSubtarget &Subtarget) {
2552 // If this is a single bit test that can't be handled by ANDI, shift the
2553 // bit to be tested to the MSB and perform a signed compare with 0.
2554 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2555 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2556 isa<ConstantSDNode>(LHS.getOperand(1)) &&
2557 // XAndesPerf supports branch on test bit.
2558 !Subtarget.hasVendorXAndesPerf()) {
2559 uint64_t Mask = LHS.getConstantOperandVal(1);
2560 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2561 unsigned ShAmt = 0;
2562 if (isPowerOf2_64(Mask)) {
2563 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
2564 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2565 } else {
2566 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2567 }
2568
2569 LHS = LHS.getOperand(0);
2570 if (ShAmt != 0)
2571 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2572 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2573 return;
2574 }
2575 }
2576
2577 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2578 int64_t C = RHSC->getSExtValue();
2579 switch (CC) {
2580 default: break;
2581 case ISD::SETGT:
2582 // Convert X > -1 to X >= 0.
2583 if (C == -1) {
2584 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2585 CC = ISD::SETGE;
2586 return;
2587 }
2588 if ((Subtarget.hasVendorXqcicm() || Subtarget.hasVendorXqcicli()) &&
2589 C != INT64_MAX && isInt<5>(C + 1)) {
2590 // We have a conditional move instruction for SETGE but not SETGT.
2591 // Convert X > C to X >= C + 1, if (C + 1) is a 5-bit signed immediate.
2592 RHS = DAG.getSignedConstant(C + 1, DL, RHS.getValueType());
2593 CC = ISD::SETGE;
2594 return;
2595 }
2596 if (Subtarget.hasVendorXqcibi() && C != INT64_MAX && isInt<16>(C + 1)) {
2597 // We have a branch immediate instruction for SETGE but not SETGT.
2598 // Convert X > C to X >= C + 1, if (C + 1) is a 16-bit signed immediate.
2599 RHS = DAG.getSignedConstant(C + 1, DL, RHS.getValueType());
2600 CC = ISD::SETGE;
2601 return;
2602 }
2603 break;
2604 case ISD::SETLT:
2605 // Convert X < 1 to 0 >= X.
2606 if (C == 1) {
2607 RHS = LHS;
2608 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2609 CC = ISD::SETGE;
2610 return;
2611 }
2612 break;
2613 case ISD::SETUGT:
2614 if ((Subtarget.hasVendorXqcicm() || Subtarget.hasVendorXqcicli()) &&
2615 C != INT64_MAX && isUInt<5>(C + 1)) {
2616 // We have a conditional move instruction for SETUGE but not SETUGT.
2617 // Convert X > C to X >= C + 1, if (C + 1) is a 5-bit signed immediate.
2618 RHS = DAG.getConstant(C + 1, DL, RHS.getValueType());
2619 CC = ISD::SETUGE;
2620 return;
2621 }
2622 if (Subtarget.hasVendorXqcibi() && C != INT64_MAX && isUInt<16>(C + 1)) {
2623 // We have a branch immediate instruction for SETUGE but not SETUGT.
2624 // Convert X > C to X >= C + 1, if (C + 1) is a 16-bit unsigned
2625 // immediate.
2626 RHS = DAG.getConstant(C + 1, DL, RHS.getValueType());
2627 CC = ISD::SETUGE;
2628 return;
2629 }
2630 break;
2631 }
2632 }
2633
2634 switch (CC) {
2635 default:
2636 break;
2637 case ISD::SETGT:
2638 case ISD::SETLE:
2639 case ISD::SETUGT:
2640 case ISD::SETULE:
2642 std::swap(LHS, RHS);
2643 break;
2644 }
2645}
2646
2648 if (VT.isRISCVVectorTuple()) {
2649 if (VT.SimpleTy >= MVT::riscv_nxv1i8x2 &&
2650 VT.SimpleTy <= MVT::riscv_nxv1i8x8)
2651 return RISCVVType::LMUL_F8;
2652 if (VT.SimpleTy >= MVT::riscv_nxv2i8x2 &&
2653 VT.SimpleTy <= MVT::riscv_nxv2i8x8)
2654 return RISCVVType::LMUL_F4;
2655 if (VT.SimpleTy >= MVT::riscv_nxv4i8x2 &&
2656 VT.SimpleTy <= MVT::riscv_nxv4i8x8)
2657 return RISCVVType::LMUL_F2;
2658 if (VT.SimpleTy >= MVT::riscv_nxv8i8x2 &&
2659 VT.SimpleTy <= MVT::riscv_nxv8i8x8)
2660 return RISCVVType::LMUL_1;
2661 if (VT.SimpleTy >= MVT::riscv_nxv16i8x2 &&
2662 VT.SimpleTy <= MVT::riscv_nxv16i8x4)
2663 return RISCVVType::LMUL_2;
2664 if (VT.SimpleTy == MVT::riscv_nxv32i8x2)
2665 return RISCVVType::LMUL_4;
2666 llvm_unreachable("Invalid vector tuple type LMUL.");
2667 }
2668
2669 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2670 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2671 if (VT.getVectorElementType() == MVT::i1)
2672 KnownSize *= 8;
2673
2674 switch (KnownSize) {
2675 default:
2676 llvm_unreachable("Invalid LMUL.");
2677 case 8:
2678 return RISCVVType::LMUL_F8;
2679 case 16:
2680 return RISCVVType::LMUL_F4;
2681 case 32:
2682 return RISCVVType::LMUL_F2;
2683 case 64:
2684 return RISCVVType::LMUL_1;
2685 case 128:
2686 return RISCVVType::LMUL_2;
2687 case 256:
2688 return RISCVVType::LMUL_4;
2689 case 512:
2690 return RISCVVType::LMUL_8;
2691 }
2692}
2693
2695 switch (LMul) {
2696 default:
2697 llvm_unreachable("Invalid LMUL.");
2701 case RISCVVType::LMUL_1:
2702 return RISCV::VRRegClassID;
2703 case RISCVVType::LMUL_2:
2704 return RISCV::VRM2RegClassID;
2705 case RISCVVType::LMUL_4:
2706 return RISCV::VRM4RegClassID;
2707 case RISCVVType::LMUL_8:
2708 return RISCV::VRM8RegClassID;
2709 }
2710}
2711
2712unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
2713 RISCVVType::VLMUL LMUL = getLMUL(VT);
2714 if (LMUL == RISCVVType::LMUL_F8 || LMUL == RISCVVType::LMUL_F4 ||
2715 LMUL == RISCVVType::LMUL_F2 || LMUL == RISCVVType::LMUL_1) {
2716 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2717 "Unexpected subreg numbering");
2718 return RISCV::sub_vrm1_0 + Index;
2719 }
2720 if (LMUL == RISCVVType::LMUL_2) {
2721 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2722 "Unexpected subreg numbering");
2723 return RISCV::sub_vrm2_0 + Index;
2724 }
2725 if (LMUL == RISCVVType::LMUL_4) {
2726 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2727 "Unexpected subreg numbering");
2728 return RISCV::sub_vrm4_0 + Index;
2729 }
2730 llvm_unreachable("Invalid vector type.");
2731}
2732
2734 if (VT.isRISCVVectorTuple()) {
2735 unsigned NF = VT.getRISCVVectorTupleNumFields();
2736 unsigned RegsPerField =
2737 std::max(1U, (unsigned)VT.getSizeInBits().getKnownMinValue() /
2738 (NF * RISCV::RVVBitsPerBlock));
2739 switch (RegsPerField) {
2740 case 1:
2741 if (NF == 2)
2742 return RISCV::VRN2M1RegClassID;
2743 if (NF == 3)
2744 return RISCV::VRN3M1RegClassID;
2745 if (NF == 4)
2746 return RISCV::VRN4M1RegClassID;
2747 if (NF == 5)
2748 return RISCV::VRN5M1RegClassID;
2749 if (NF == 6)
2750 return RISCV::VRN6M1RegClassID;
2751 if (NF == 7)
2752 return RISCV::VRN7M1RegClassID;
2753 if (NF == 8)
2754 return RISCV::VRN8M1RegClassID;
2755 break;
2756 case 2:
2757 if (NF == 2)
2758 return RISCV::VRN2M2RegClassID;
2759 if (NF == 3)
2760 return RISCV::VRN3M2RegClassID;
2761 if (NF == 4)
2762 return RISCV::VRN4M2RegClassID;
2763 break;
2764 case 4:
2765 assert(NF == 2);
2766 return RISCV::VRN2M4RegClassID;
2767 default:
2768 break;
2769 }
2770 llvm_unreachable("Invalid vector tuple type RegClass.");
2771 }
2772
2773 if (VT.getVectorElementType() == MVT::i1)
2774 return RISCV::VRRegClassID;
2775 return getRegClassIDForLMUL(getLMUL(VT));
2776}
2777
2778// Attempt to decompose a subvector insert/extract between VecVT and
2779// SubVecVT via subregister indices. Returns the subregister index that
2780// can perform the subvector insert/extract with the given element index, as
2781// well as the index corresponding to any leftover subvectors that must be
2782// further inserted/extracted within the register class for SubVecVT.
2783std::pair<unsigned, unsigned>
2785 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2786 const RISCVRegisterInfo *TRI) {
2787 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2788 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2789 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2790 "Register classes not ordered");
2791 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2792 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2793
2794 // If VecVT is a vector tuple type, either it's the tuple type with same
2795 // RegClass with SubVecVT or SubVecVT is a actually a subvector of the VecVT.
2796 if (VecVT.isRISCVVectorTuple()) {
2797 if (VecRegClassID == SubRegClassID)
2798 return {RISCV::NoSubRegister, 0};
2799
2800 assert(SubVecVT.isScalableVector() &&
2801 "Only allow scalable vector subvector.");
2802 assert(getLMUL(VecVT) == getLMUL(SubVecVT) &&
2803 "Invalid vector tuple insert/extract for vector and subvector with "
2804 "different LMUL.");
2805 return {getSubregIndexByMVT(VecVT, InsertExtractIdx), 0};
2806 }
2807
2808 // Try to compose a subregister index that takes us from the incoming
2809 // LMUL>1 register class down to the outgoing one. At each step we half
2810 // the LMUL:
2811 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2812 // Note that this is not guaranteed to find a subregister index, such as
2813 // when we are extracting from one VR type to another.
2814 unsigned SubRegIdx = RISCV::NoSubRegister;
2815 for (const unsigned RCID :
2816 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2817 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2818 VecVT = VecVT.getHalfNumVectorElementsVT();
2819 bool IsHi =
2820 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2821 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2822 getSubregIndexByMVT(VecVT, IsHi));
2823 if (IsHi)
2824 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2825 }
2826 return {SubRegIdx, InsertExtractIdx};
2827}
2828
2829// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2830// stores for those types.
2831bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2832 return !Subtarget.useRVVForFixedLengthVectors() ||
2833 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2834}
2835
2837 if (!ScalarTy.isSimple())
2838 return false;
2839 switch (ScalarTy.getSimpleVT().SimpleTy) {
2840 case MVT::iPTR:
2841 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2842 case MVT::i8:
2843 case MVT::i16:
2844 case MVT::i32:
2845 return Subtarget.hasVInstructions();
2846 case MVT::i64:
2847 return Subtarget.hasVInstructionsI64();
2848 case MVT::f16:
2849 return Subtarget.hasVInstructionsF16Minimal();
2850 case MVT::bf16:
2851 return Subtarget.hasVInstructionsBF16Minimal();
2852 case MVT::f32:
2853 return Subtarget.hasVInstructionsF32();
2854 case MVT::f64:
2855 return Subtarget.hasVInstructionsF64();
2856 default:
2857 return false;
2858 }
2859}
2860
2861
2863 return NumRepeatedDivisors;
2864}
2865
2867 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2868 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2869 "Unexpected opcode");
2870 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2871 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2873 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2874 if (!II)
2875 return SDValue();
2876 return Op.getOperand(II->VLOperand + 1 + HasChain);
2877}
2878
2880 const RISCVSubtarget &Subtarget) {
2881 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2882 if (!Subtarget.useRVVForFixedLengthVectors())
2883 return false;
2884
2885 // We only support a set of vector types with a consistent maximum fixed size
2886 // across all supported vector element types to avoid legalization issues.
2887 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2888 // fixed-length vector type we support is 1024 bytes.
2889 if (VT.getVectorNumElements() > 1024 || VT.getFixedSizeInBits() > 1024 * 8)
2890 return false;
2891
2892 unsigned MinVLen = Subtarget.getRealMinVLen();
2893
2894 MVT EltVT = VT.getVectorElementType();
2895
2896 // Don't use RVV for vectors we cannot scalarize if required.
2897 switch (EltVT.SimpleTy) {
2898 // i1 is supported but has different rules.
2899 default:
2900 return false;
2901 case MVT::i1:
2902 // Masks can only use a single register.
2903 if (VT.getVectorNumElements() > MinVLen)
2904 return false;
2905 MinVLen /= 8;
2906 break;
2907 case MVT::i8:
2908 case MVT::i16:
2909 case MVT::i32:
2910 break;
2911 case MVT::i64:
2912 if (!Subtarget.hasVInstructionsI64())
2913 return false;
2914 break;
2915 case MVT::f16:
2916 if (!Subtarget.hasVInstructionsF16Minimal())
2917 return false;
2918 break;
2919 case MVT::bf16:
2920 if (!Subtarget.hasVInstructionsBF16Minimal())
2921 return false;
2922 break;
2923 case MVT::f32:
2924 if (!Subtarget.hasVInstructionsF32())
2925 return false;
2926 break;
2927 case MVT::f64:
2928 if (!Subtarget.hasVInstructionsF64())
2929 return false;
2930 break;
2931 }
2932
2933 // Reject elements larger than ELEN.
2934 if (EltVT.getSizeInBits() > Subtarget.getELen())
2935 return false;
2936
2937 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2938 // Don't use RVV for types that don't fit.
2939 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2940 return false;
2941
2942 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2943 // the base fixed length RVV support in place.
2944 if (!VT.isPow2VectorType())
2945 return false;
2946
2947 return true;
2948}
2949
2950bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2951 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2952}
2953
2954// Return the largest legal scalable vector type that matches VT's element type.
2956 const RISCVSubtarget &Subtarget) {
2957 // This may be called before legal types are setup.
2958 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2959 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2960 "Expected legal fixed length vector!");
2961
2962 unsigned MinVLen = Subtarget.getRealMinVLen();
2963 unsigned MaxELen = Subtarget.getELen();
2964
2965 MVT EltVT = VT.getVectorElementType();
2966 switch (EltVT.SimpleTy) {
2967 default:
2968 llvm_unreachable("unexpected element type for RVV container");
2969 case MVT::i1:
2970 case MVT::i8:
2971 case MVT::i16:
2972 case MVT::i32:
2973 case MVT::i64:
2974 case MVT::bf16:
2975 case MVT::f16:
2976 case MVT::f32:
2977 case MVT::f64: {
2978 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2979 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2980 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2981 unsigned NumElts =
2983 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2984 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2985 return MVT::getScalableVectorVT(EltVT, NumElts);
2986 }
2987 }
2988}
2989
2991 const RISCVSubtarget &Subtarget) {
2993 Subtarget);
2994}
2995
2997 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2998}
2999
3000// Grow V to consume an entire RVV register.
3002 const RISCVSubtarget &Subtarget) {
3003 assert(VT.isScalableVector() &&
3004 "Expected to convert into a scalable vector!");
3005 assert(V.getValueType().isFixedLengthVector() &&
3006 "Expected a fixed length vector operand!");
3007 SDLoc DL(V);
3008 return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), V, 0);
3009}
3010
3011// Shrink V so it's just big enough to maintain a VT's worth of data.
3013 const RISCVSubtarget &Subtarget) {
3015 "Expected to convert into a fixed length vector!");
3016 assert(V.getValueType().isScalableVector() &&
3017 "Expected a scalable vector operand!");
3018 SDLoc DL(V);
3019 return DAG.getExtractSubvector(DL, VT, V, 0);
3020}
3021
3022/// Return the type of the mask type suitable for masking the provided
3023/// vector type. This is simply an i1 element type vector of the same
3024/// (possibly scalable) length.
3025static MVT getMaskTypeFor(MVT VecVT) {
3026 assert(VecVT.isVector());
3028 return MVT::getVectorVT(MVT::i1, EC);
3029}
3030
3031/// Creates an all ones mask suitable for masking a vector of type VecTy with
3032/// vector length VL. .
3033static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
3034 SelectionDAG &DAG) {
3035 MVT MaskVT = getMaskTypeFor(VecVT);
3036 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
3037}
3038
3039static std::pair<SDValue, SDValue>
3041 const RISCVSubtarget &Subtarget) {
3042 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
3043 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
3044 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
3045 return {Mask, VL};
3046}
3047
3048static std::pair<SDValue, SDValue>
3049getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
3050 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
3051 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
3052 SDValue VL = DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
3053 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
3054 return {Mask, VL};
3055}
3056
3057// Gets the two common "VL" operands: an all-ones mask and the vector length.
3058// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
3059// the vector type that the fixed-length vector is contained in. Otherwise if
3060// VecVT is scalable, then ContainerVT should be the same as VecVT.
3061static std::pair<SDValue, SDValue>
3062getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
3063 const RISCVSubtarget &Subtarget) {
3064 if (VecVT.isFixedLengthVector())
3065 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
3066 Subtarget);
3067 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
3068 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
3069}
3070
3072 SelectionDAG &DAG) const {
3073 assert(VecVT.isScalableVector() && "Expected scalable vector");
3074 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
3075 VecVT.getVectorElementCount());
3076}
3077
3078std::pair<unsigned, unsigned>
3080 const RISCVSubtarget &Subtarget) {
3081 assert(VecVT.isScalableVector() && "Expected scalable vector");
3082
3083 unsigned EltSize = VecVT.getScalarSizeInBits();
3084 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
3085
3086 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
3087 unsigned MaxVLMAX =
3088 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
3089
3090 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
3091 unsigned MinVLMAX =
3092 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
3093
3094 return std::make_pair(MinVLMAX, MaxVLMAX);
3095}
3096
3097// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
3098// of either is (currently) supported. This can get us into an infinite loop
3099// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
3100// as a ..., etc.
3101// Until either (or both) of these can reliably lower any node, reporting that
3102// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
3103// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
3104// which is not desirable.
3106 EVT VT, unsigned DefinedValues) const {
3107 return false;
3108}
3109
3111 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
3112 // implementation-defined.
3113 if (!VT.isVector())
3115 unsigned DLenFactor = Subtarget.getDLenFactor();
3116 unsigned Cost;
3117 if (VT.isScalableVector()) {
3118 unsigned LMul;
3119 bool Fractional;
3120 std::tie(LMul, Fractional) =
3122 if (Fractional)
3123 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
3124 else
3125 Cost = (LMul * DLenFactor);
3126 } else {
3127 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
3128 }
3129 return Cost;
3130}
3131
3132
3133/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
3134/// may be quadratic in the number of vreg implied by LMUL, and is assumed to
3135/// be by default. VRGatherCostModel reflects available options. Note that
3136/// operand (index and possibly mask) are handled separately.
3138 auto LMULCost = getLMULCost(VT);
3139 bool Log2CostModel =
3140 Subtarget.getVRGatherCostModel() == llvm::RISCVSubtarget::NLog2N;
3141 if (Log2CostModel && LMULCost.isValid()) {
3142 unsigned Log = Log2_64(LMULCost.getValue());
3143 if (Log > 0)
3144 return LMULCost * Log;
3145 }
3146 return LMULCost * LMULCost;
3147}
3148
3149/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
3150/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
3151/// or may track the vrgather.vv cost. It is implementation-dependent.
3155
3156/// Return the cost of a vslidedown.vx or vslideup.vx instruction
3157/// for the type VT. (This does not cover the vslide1up or vslide1down
3158/// variants.) Slides may be linear in the number of vregs implied by LMUL,
3159/// or may track the vrgather.vv cost. It is implementation-dependent.
3163
3164/// Return the cost of a vslidedown.vi or vslideup.vi instruction
3165/// for the type VT. (This does not cover the vslide1up or vslide1down
3166/// variants.) Slides may be linear in the number of vregs implied by LMUL,
3167/// or may track the vrgather.vv cost. It is implementation-dependent.
3171
3173 const RISCVSubtarget &Subtarget) {
3174 // f16 conversions are promoted to f32 when Zfh/Zhinx are not supported.
3175 // bf16 conversions are always promoted to f32.
3176 if ((Op.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3177 Op.getValueType() == MVT::bf16) {
3178 bool IsStrict = Op->isStrictFPOpcode();
3179
3180 SDLoc DL(Op);
3181 if (IsStrict) {
3182 SDValue Val = DAG.getNode(Op.getOpcode(), DL, {MVT::f32, MVT::Other},
3183 {Op.getOperand(0), Op.getOperand(1)});
3184 return DAG.getNode(ISD::STRICT_FP_ROUND, DL,
3185 {Op.getValueType(), MVT::Other},
3186 {Val.getValue(1), Val.getValue(0),
3187 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)});
3188 }
3189 return DAG.getNode(
3190 ISD::FP_ROUND, DL, Op.getValueType(),
3191 DAG.getNode(Op.getOpcode(), DL, MVT::f32, Op.getOperand(0)),
3192 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
3193 }
3194
3195 // Other operations are legal.
3196 return Op;
3197}
3198
3200 const RISCVSubtarget &Subtarget) {
3201 // RISC-V FP-to-int conversions saturate to the destination register size, but
3202 // don't produce 0 for nan. We can use a conversion instruction and fix the
3203 // nan case with a compare and a select.
3204 SDValue Src = Op.getOperand(0);
3205
3206 MVT DstVT = Op.getSimpleValueType();
3207 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
3208
3209 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
3210
3211 if (!DstVT.isVector()) {
3212 // For bf16 or for f16 in absence of Zfh, promote to f32, then saturate
3213 // the result.
3214 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3215 Src.getValueType() == MVT::bf16) {
3216 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
3217 }
3218
3219 unsigned Opc;
3220 if (SatVT == DstVT)
3221 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
3222 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
3223 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
3224 else
3225 return SDValue();
3226 // FIXME: Support other SatVTs by clamping before or after the conversion.
3227
3228 SDLoc DL(Op);
3229 SDValue FpToInt = DAG.getNode(
3230 Opc, DL, DstVT, Src,
3232
3233 if (Opc == RISCVISD::FCVT_WU_RV64)
3234 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
3235
3236 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
3237 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
3239 }
3240
3241 // Vectors.
3242
3243 MVT DstEltVT = DstVT.getVectorElementType();
3244 MVT SrcVT = Src.getSimpleValueType();
3245 MVT SrcEltVT = SrcVT.getVectorElementType();
3246 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
3247 unsigned DstEltSize = DstEltVT.getSizeInBits();
3248
3249 // Only handle saturating to the destination type.
3250 if (SatVT != DstEltVT)
3251 return SDValue();
3252
3253 MVT DstContainerVT = DstVT;
3254 MVT SrcContainerVT = SrcVT;
3255 if (DstVT.isFixedLengthVector()) {
3256 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
3257 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3258 assert(DstContainerVT.getVectorElementCount() ==
3259 SrcContainerVT.getVectorElementCount() &&
3260 "Expected same element count");
3261 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3262 }
3263
3264 SDLoc DL(Op);
3265
3266 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
3267
3268 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
3269 {Src, Src, DAG.getCondCode(ISD::SETNE),
3270 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
3271
3272 // Need to widen by more than 1 step, promote the FP type, then do a widening
3273 // convert.
3274 if (DstEltSize > (2 * SrcEltSize)) {
3275 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
3276 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
3277 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
3278 }
3279
3280 MVT CvtContainerVT = DstContainerVT;
3281 MVT CvtEltVT = DstEltVT;
3282 if (SrcEltSize > (2 * DstEltSize)) {
3283 CvtEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
3284 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
3285 }
3286
3287 unsigned RVVOpc =
3288 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
3289 SDValue Res = DAG.getNode(RVVOpc, DL, CvtContainerVT, Src, Mask, VL);
3290
3291 while (CvtContainerVT != DstContainerVT) {
3292 CvtEltVT = MVT::getIntegerVT(CvtEltVT.getSizeInBits() / 2);
3293 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
3294 // Rounding mode here is arbitrary since we aren't shifting out any bits.
3295 unsigned ClipOpc = IsSigned ? RISCVISD::TRUNCATE_VECTOR_VL_SSAT
3296 : RISCVISD::TRUNCATE_VECTOR_VL_USAT;
3297 Res = DAG.getNode(ClipOpc, DL, CvtContainerVT, Res, Mask, VL);
3298 }
3299
3300 SDValue SplatZero = DAG.getNode(
3301 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
3302 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
3303 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
3304 Res, DAG.getUNDEF(DstContainerVT), VL);
3305
3306 if (DstVT.isFixedLengthVector())
3307 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
3308
3309 return Res;
3310}
3311
3313 const RISCVSubtarget &Subtarget) {
3314 bool IsStrict = Op->isStrictFPOpcode();
3315 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3316
3317 // f16 conversions are promoted to f32 when Zfh/Zhinx is not enabled.
3318 // bf16 conversions are always promoted to f32.
3319 if ((SrcVal.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3320 SrcVal.getValueType() == MVT::bf16) {
3321 SDLoc DL(Op);
3322 if (IsStrict) {
3323 SDValue Ext =
3324 DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
3325 {Op.getOperand(0), SrcVal});
3326 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
3327 {Ext.getValue(1), Ext.getValue(0)});
3328 }
3329 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
3330 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, SrcVal));
3331 }
3332
3333 // Other operations are legal.
3334 return Op;
3335}
3336
3338 switch (Opc) {
3339 case ISD::FROUNDEVEN:
3341 case ISD::VP_FROUNDEVEN:
3342 return RISCVFPRndMode::RNE;
3343 case ISD::FTRUNC:
3344 case ISD::STRICT_FTRUNC:
3345 case ISD::VP_FROUNDTOZERO:
3346 return RISCVFPRndMode::RTZ;
3347 case ISD::FFLOOR:
3348 case ISD::STRICT_FFLOOR:
3349 case ISD::VP_FFLOOR:
3350 return RISCVFPRndMode::RDN;
3351 case ISD::FCEIL:
3352 case ISD::STRICT_FCEIL:
3353 case ISD::VP_FCEIL:
3354 return RISCVFPRndMode::RUP;
3355 case ISD::FROUND:
3356 case ISD::LROUND:
3357 case ISD::LLROUND:
3358 case ISD::STRICT_FROUND:
3359 case ISD::STRICT_LROUND:
3361 case ISD::VP_FROUND:
3362 return RISCVFPRndMode::RMM;
3363 case ISD::FRINT:
3364 case ISD::LRINT:
3365 case ISD::LLRINT:
3366 case ISD::STRICT_FRINT:
3367 case ISD::STRICT_LRINT:
3368 case ISD::STRICT_LLRINT:
3369 case ISD::VP_FRINT:
3370 case ISD::VP_LRINT:
3371 case ISD::VP_LLRINT:
3372 return RISCVFPRndMode::DYN;
3373 }
3374
3376}
3377
3378// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
3379// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
3380// the integer domain and back. Taking care to avoid converting values that are
3381// nan or already correct.
3382static SDValue
3384 const RISCVSubtarget &Subtarget) {
3385 MVT VT = Op.getSimpleValueType();
3386 assert(VT.isVector() && "Unexpected type");
3387
3388 SDLoc DL(Op);
3389
3390 SDValue Src = Op.getOperand(0);
3391
3392 // Freeze the source since we are increasing the number of uses.
3393 Src = DAG.getFreeze(Src);
3394
3395 MVT ContainerVT = VT;
3396 if (VT.isFixedLengthVector()) {
3397 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3398 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3399 }
3400
3401 SDValue Mask, VL;
3402 if (Op->isVPOpcode()) {
3403 Mask = Op.getOperand(1);
3404 if (VT.isFixedLengthVector())
3405 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
3406 Subtarget);
3407 VL = Op.getOperand(2);
3408 } else {
3409 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3410 }
3411
3412 // We do the conversion on the absolute value and fix the sign at the end.
3413 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3414
3415 // Determine the largest integer that can be represented exactly. This and
3416 // values larger than it don't have any fractional bits so don't need to
3417 // be converted.
3418 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3419 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3420 APFloat MaxVal = APFloat(FltSem);
3421 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3422 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3423 SDValue MaxValNode =
3424 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3425 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3426 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3427
3428 // If abs(Src) was larger than MaxVal or nan, keep it.
3429 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3430 Mask =
3431 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
3432 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
3433 Mask, Mask, VL});
3434
3435 // Truncate to integer and convert back to FP.
3436 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3437 MVT XLenVT = Subtarget.getXLenVT();
3438 SDValue Truncated;
3439
3440 switch (Op.getOpcode()) {
3441 default:
3442 llvm_unreachable("Unexpected opcode");
3443 case ISD::FRINT:
3444 case ISD::VP_FRINT:
3445 case ISD::FCEIL:
3446 case ISD::VP_FCEIL:
3447 case ISD::FFLOOR:
3448 case ISD::VP_FFLOOR:
3449 case ISD::FROUND:
3450 case ISD::FROUNDEVEN:
3451 case ISD::VP_FROUND:
3452 case ISD::VP_FROUNDEVEN:
3453 case ISD::VP_FROUNDTOZERO: {
3456 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
3457 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
3458 break;
3459 }
3460 case ISD::FTRUNC:
3461 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
3462 Mask, VL);
3463 break;
3464 case ISD::FNEARBYINT:
3465 case ISD::VP_FNEARBYINT:
3466 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3467 Mask, VL);
3468 break;
3469 }
3470
3471 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3472 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3473 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3474 Mask, VL);
3475
3476 // Restore the original sign so that -0.0 is preserved.
3477 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3478 Src, Src, Mask, VL);
3479
3480 if (!VT.isFixedLengthVector())
3481 return Truncated;
3482
3483 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3484}
3485
3486// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3487// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3488// qNan and converting the new source to integer and back to FP.
3489static SDValue
3491 const RISCVSubtarget &Subtarget) {
3492 SDLoc DL(Op);
3493 MVT VT = Op.getSimpleValueType();
3494 SDValue Chain = Op.getOperand(0);
3495 SDValue Src = Op.getOperand(1);
3496
3497 MVT ContainerVT = VT;
3498 if (VT.isFixedLengthVector()) {
3499 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3500 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3501 }
3502
3503 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3504
3505 // Freeze the source since we are increasing the number of uses.
3506 Src = DAG.getFreeze(Src);
3507
3508 // Convert sNan to qNan by executing x + x for all unordered element x in Src.
3509 MVT MaskVT = Mask.getSimpleValueType();
3510 SDValue Unorder = DAG.getNode(RISCVISD::STRICT_FSETCC_VL, DL,
3511 DAG.getVTList(MaskVT, MVT::Other),
3512 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3513 DAG.getUNDEF(MaskVT), Mask, VL});
3514 Chain = Unorder.getValue(1);
3515 Src = DAG.getNode(RISCVISD::STRICT_FADD_VL, DL,
3516 DAG.getVTList(ContainerVT, MVT::Other),
3517 {Chain, Src, Src, Src, Unorder, VL});
3518 Chain = Src.getValue(1);
3519
3520 // We do the conversion on the absolute value and fix the sign at the end.
3521 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3522
3523 // Determine the largest integer that can be represented exactly. This and
3524 // values larger than it don't have any fractional bits so don't need to
3525 // be converted.
3526 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3527 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3528 APFloat MaxVal = APFloat(FltSem);
3529 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3530 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3531 SDValue MaxValNode =
3532 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3533 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3534 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3535
3536 // If abs(Src) was larger than MaxVal or nan, keep it.
3537 Mask = DAG.getNode(
3538 RISCVISD::SETCC_VL, DL, MaskVT,
3539 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3540
3541 // Truncate to integer and convert back to FP.
3542 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3543 MVT XLenVT = Subtarget.getXLenVT();
3544 SDValue Truncated;
3545
3546 switch (Op.getOpcode()) {
3547 default:
3548 llvm_unreachable("Unexpected opcode");
3549 case ISD::STRICT_FCEIL:
3550 case ISD::STRICT_FFLOOR:
3551 case ISD::STRICT_FROUND:
3555 Truncated = DAG.getNode(
3556 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3557 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3558 break;
3559 }
3560 case ISD::STRICT_FTRUNC:
3561 Truncated =
3562 DAG.getNode(RISCVISD::STRICT_VFCVT_RTZ_X_F_VL, DL,
3563 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3564 break;
3566 Truncated = DAG.getNode(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL, DL,
3567 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3568 Mask, VL);
3569 break;
3570 }
3571 Chain = Truncated.getValue(1);
3572
3573 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3574 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3575 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3576 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3577 Truncated, Mask, VL);
3578 Chain = Truncated.getValue(1);
3579 }
3580
3581 // Restore the original sign so that -0.0 is preserved.
3582 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3583 Src, Src, Mask, VL);
3584
3585 if (VT.isFixedLengthVector())
3586 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3587 return DAG.getMergeValues({Truncated, Chain}, DL);
3588}
3589
3590static SDValue
3592 const RISCVSubtarget &Subtarget) {
3593 MVT VT = Op.getSimpleValueType();
3594 if (VT.isVector())
3595 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3596
3597 if (DAG.shouldOptForSize())
3598 return SDValue();
3599
3600 SDLoc DL(Op);
3601 SDValue Src = Op.getOperand(0);
3602
3603 // Create an integer the size of the mantissa with the MSB set. This and all
3604 // values larger than it don't have any fractional bits so don't need to be
3605 // converted.
3606 const fltSemantics &FltSem = VT.getFltSemantics();
3607 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3608 APFloat MaxVal = APFloat(FltSem);
3609 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3610 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3611 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3612
3614 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3615 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3616}
3617
3618// Expand vector [L]LRINT and [L]LROUND by converting to the integer domain.
3620 const RISCVSubtarget &Subtarget) {
3621 SDLoc DL(Op);
3622 MVT DstVT = Op.getSimpleValueType();
3623 SDValue Src = Op.getOperand(0);
3624 MVT SrcVT = Src.getSimpleValueType();
3625 assert(SrcVT.isVector() && DstVT.isVector() &&
3626 !(SrcVT.isFixedLengthVector() ^ DstVT.isFixedLengthVector()) &&
3627 "Unexpected type");
3628
3629 MVT DstContainerVT = DstVT;
3630 MVT SrcContainerVT = SrcVT;
3631
3632 if (DstVT.isFixedLengthVector()) {
3633 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
3634 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3635 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3636 }
3637
3638 auto [Mask, VL] = getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
3639
3640 // [b]f16 -> f32
3641 MVT SrcElemType = SrcVT.getVectorElementType();
3642 if (SrcElemType == MVT::f16 || SrcElemType == MVT::bf16) {
3643 MVT F32VT = SrcContainerVT.changeVectorElementType(MVT::f32);
3644 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, F32VT, Src, Mask, VL);
3645 }
3646
3647 SDValue Res =
3648 DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, DstContainerVT, Src, Mask,
3649 DAG.getTargetConstant(matchRoundingOp(Op.getOpcode()), DL,
3650 Subtarget.getXLenVT()),
3651 VL);
3652
3653 if (!DstVT.isFixedLengthVector())
3654 return Res;
3655
3656 return convertFromScalableVector(DstVT, Res, DAG, Subtarget);
3657}
3658
3659static SDValue
3661 const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op,
3662 SDValue Offset, SDValue Mask, SDValue VL,
3664 if (Passthru.isUndef())
3666 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3667 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3668 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3669}
3670
3671static SDValue
3672getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3673 EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask,
3674 SDValue VL,
3676 if (Passthru.isUndef())
3678 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3679 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3680 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3681}
3682
3686 int64_t Addend;
3687};
3688
3689static std::optional<APInt> getExactInteger(const APFloat &APF,
3691 // We will use a SINT_TO_FP to materialize this constant so we should use a
3692 // signed APSInt here.
3693 APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3694 // We use an arbitrary rounding mode here. If a floating-point is an exact
3695 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3696 // the rounding mode changes the output value, then it is not an exact
3697 // integer.
3699 bool IsExact;
3700 // If it is out of signed integer range, it will return an invalid operation.
3701 // If it is not an exact integer, IsExact is false.
3702 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3704 !IsExact)
3705 return std::nullopt;
3706 return ValInt.extractBits(BitWidth, 0);
3707}
3708
3709// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3710// to the (non-zero) step S and start value X. This can be then lowered as the
3711// RVV sequence (VID * S) + X, for example.
3712// The step S is represented as an integer numerator divided by a positive
3713// denominator. Note that the implementation currently only identifies
3714// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3715// cannot detect 2/3, for example.
3716// Note that this method will also match potentially unappealing index
3717// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3718// determine whether this is worth generating code for.
3719//
3720// EltSizeInBits is the size of the type that the sequence will be calculated
3721// in, i.e. SEW for build_vectors or XLEN for address calculations.
3722static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3723 unsigned EltSizeInBits) {
3724 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3726 return std::nullopt;
3727 bool IsInteger = Op.getValueType().isInteger();
3728
3729 std::optional<unsigned> SeqStepDenom;
3730 std::optional<APInt> SeqStepNum;
3731 std::optional<APInt> SeqAddend;
3732 std::optional<std::pair<APInt, unsigned>> PrevElt;
3733 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3734
3735 // First extract the ops into a list of constant integer values. This may not
3736 // be possible for floats if they're not all representable as integers.
3737 SmallVector<std::optional<APInt>> Elts(Op.getNumOperands());
3738 const unsigned OpSize = Op.getScalarValueSizeInBits();
3739 for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3740 if (Elt.isUndef()) {
3741 Elts[Idx] = std::nullopt;
3742 continue;
3743 }
3744 if (IsInteger) {
3745 Elts[Idx] = Elt->getAsAPIntVal().trunc(OpSize).zext(EltSizeInBits);
3746 } else {
3747 auto ExactInteger =
3748 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3749 if (!ExactInteger)
3750 return std::nullopt;
3751 Elts[Idx] = *ExactInteger;
3752 }
3753 }
3754
3755 for (auto [Idx, Elt] : enumerate(Elts)) {
3756 // Assume undef elements match the sequence; we just have to be careful
3757 // when interpolating across them.
3758 if (!Elt)
3759 continue;
3760
3761 if (PrevElt) {
3762 // Calculate the step since the last non-undef element, and ensure
3763 // it's consistent across the entire sequence.
3764 unsigned IdxDiff = Idx - PrevElt->second;
3765 APInt ValDiff = *Elt - PrevElt->first;
3766
3767 // A zero-value value difference means that we're somewhere in the middle
3768 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3769 // step change before evaluating the sequence.
3770 if (ValDiff == 0)
3771 continue;
3772
3773 int64_t Remainder = ValDiff.srem(IdxDiff);
3774 // Normalize the step if it's greater than 1.
3775 if (Remainder != ValDiff.getSExtValue()) {
3776 // The difference must cleanly divide the element span.
3777 if (Remainder != 0)
3778 return std::nullopt;
3779 ValDiff = ValDiff.sdiv(IdxDiff);
3780 IdxDiff = 1;
3781 }
3782
3783 if (!SeqStepNum)
3784 SeqStepNum = ValDiff;
3785 else if (ValDiff != SeqStepNum)
3786 return std::nullopt;
3787
3788 if (!SeqStepDenom)
3789 SeqStepDenom = IdxDiff;
3790 else if (IdxDiff != *SeqStepDenom)
3791 return std::nullopt;
3792 }
3793
3794 // Record this non-undef element for later.
3795 if (!PrevElt || PrevElt->first != *Elt)
3796 PrevElt = std::make_pair(*Elt, Idx);
3797 }
3798
3799 // We need to have logged a step for this to count as a legal index sequence.
3800 if (!SeqStepNum || !SeqStepDenom)
3801 return std::nullopt;
3802
3803 // Loop back through the sequence and validate elements we might have skipped
3804 // while waiting for a valid step. While doing this, log any sequence addend.
3805 for (auto [Idx, Elt] : enumerate(Elts)) {
3806 if (!Elt)
3807 continue;
3808 APInt ExpectedVal =
3809 (APInt(EltSizeInBits, Idx, /*isSigned=*/false, /*implicitTrunc=*/true) *
3810 *SeqStepNum)
3811 .sdiv(*SeqStepDenom);
3812
3813 APInt Addend = *Elt - ExpectedVal;
3814 if (!SeqAddend)
3815 SeqAddend = Addend;
3816 else if (Addend != SeqAddend)
3817 return std::nullopt;
3818 }
3819
3820 assert(SeqAddend && "Must have an addend if we have a step");
3821
3822 return VIDSequence{SeqStepNum->getSExtValue(), *SeqStepDenom,
3823 SeqAddend->getSExtValue()};
3824}
3825
3826// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3827// and lower it as a VRGATHER_VX_VL from the source vector.
3828static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3829 SelectionDAG &DAG,
3830 const RISCVSubtarget &Subtarget) {
3831 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3832 return SDValue();
3833 SDValue Src = SplatVal.getOperand(0);
3834 // Don't perform this optimization for i1 vectors, or if the element types are
3835 // different
3836 // FIXME: Support i1 vectors, maybe by promoting to i8?
3837 MVT EltTy = VT.getVectorElementType();
3838 if (EltTy == MVT::i1 ||
3839 !DAG.getTargetLoweringInfo().isTypeLegal(Src.getValueType()))
3840 return SDValue();
3841 MVT SrcVT = Src.getSimpleValueType();
3842 if (EltTy != SrcVT.getVectorElementType())
3843 return SDValue();
3844 SDValue Idx = SplatVal.getOperand(1);
3845 // The index must be a legal type.
3846 if (Idx.getValueType() != Subtarget.getXLenVT())
3847 return SDValue();
3848
3849 // Check that we know Idx lies within VT
3850 if (!TypeSize::isKnownLE(SrcVT.getSizeInBits(), VT.getSizeInBits())) {
3851 auto *CIdx = dyn_cast<ConstantSDNode>(Idx);
3852 if (!CIdx || CIdx->getZExtValue() >= VT.getVectorMinNumElements())
3853 return SDValue();
3854 }
3855
3856 // Convert fixed length vectors to scalable
3857 MVT ContainerVT = VT;
3858 if (VT.isFixedLengthVector())
3859 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3860
3861 MVT SrcContainerVT = SrcVT;
3862 if (SrcVT.isFixedLengthVector()) {
3863 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3864 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3865 }
3866
3867 // Put Vec in a VT sized vector
3868 if (SrcContainerVT.getVectorMinNumElements() <
3869 ContainerVT.getVectorMinNumElements())
3870 Src = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), Src, 0);
3871 else
3872 Src = DAG.getExtractSubvector(DL, ContainerVT, Src, 0);
3873
3874 // We checked that Idx fits inside VT earlier
3875 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3876 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Src,
3877 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3878 if (VT.isFixedLengthVector())
3879 Gather = convertFromScalableVector(VT, Gather, DAG, Subtarget);
3880 return Gather;
3881}
3882
3884 const RISCVSubtarget &Subtarget) {
3885 MVT VT = Op.getSimpleValueType();
3886 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3887
3888 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3889
3890 SDLoc DL(Op);
3891 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3892
3893 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3894 int64_t StepNumerator = SimpleVID->StepNumerator;
3895 unsigned StepDenominator = SimpleVID->StepDenominator;
3896 int64_t Addend = SimpleVID->Addend;
3897
3898 assert(StepNumerator != 0 && "Invalid step");
3899 bool Negate = false;
3900 int64_t SplatStepVal = StepNumerator;
3901 unsigned StepOpcode = ISD::MUL;
3902 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3903 // anyway as the shift of 63 won't fit in uimm5.
3904 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3905 isPowerOf2_64(std::abs(StepNumerator))) {
3906 Negate = StepNumerator < 0;
3907 StepOpcode = ISD::SHL;
3908 SplatStepVal = Log2_64(std::abs(StepNumerator));
3909 }
3910
3911 // Only emit VIDs with suitably-small steps. We use imm5 as a threshold
3912 // since it's the immediate value many RVV instructions accept. There is
3913 // no vmul.vi instruction so ensure multiply constant can fit in a
3914 // single addi instruction. For the addend, we allow up to 32 bits..
3915 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3916 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3917 isPowerOf2_32(StepDenominator) &&
3918 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<32>(Addend)) {
3919 MVT VIDVT =
3921 MVT VIDContainerVT =
3922 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3923 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3924 // Convert right out of the scalable type so we can use standard ISD
3925 // nodes for the rest of the computation. If we used scalable types with
3926 // these, we'd lose the fixed-length vector info and generate worse
3927 // vsetvli code.
3928 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3929 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3930 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3931 SDValue SplatStep = DAG.getSignedConstant(SplatStepVal, DL, VIDVT);
3932 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3933 }
3934 if (StepDenominator != 1) {
3935 SDValue SplatStep =
3936 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3937 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3938 }
3939 if (Addend != 0 || Negate) {
3940 SDValue SplatAddend = DAG.getSignedConstant(Addend, DL, VIDVT);
3941 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3942 VID);
3943 }
3944 if (VT.isFloatingPoint()) {
3945 // TODO: Use vfwcvt to reduce register pressure.
3946 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3947 }
3948 return VID;
3949 }
3950 }
3951
3952 return SDValue();
3953}
3954
3955/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3956/// which constitute a large proportion of the elements. In such cases we can
3957/// splat a vector with the dominant element and make up the shortfall with
3958/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3959/// Note that this includes vectors of 2 elements by association. The
3960/// upper-most element is the "dominant" one, allowing us to use a splat to
3961/// "insert" the upper element, and an insert of the lower element at position
3962/// 0, which improves codegen.
3964 const RISCVSubtarget &Subtarget) {
3965 MVT VT = Op.getSimpleValueType();
3966 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3967
3968 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3969
3970 SDLoc DL(Op);
3971 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3972
3973 MVT XLenVT = Subtarget.getXLenVT();
3974 unsigned NumElts = Op.getNumOperands();
3975
3976 SDValue DominantValue;
3977 unsigned MostCommonCount = 0;
3978 DenseMap<SDValue, unsigned> ValueCounts;
3979 unsigned NumUndefElts =
3980 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3981
3982 // Track the number of scalar loads we know we'd be inserting, estimated as
3983 // any non-zero floating-point constant. Other kinds of element are either
3984 // already in registers or are materialized on demand. The threshold at which
3985 // a vector load is more desirable than several scalar materializion and
3986 // vector-insertion instructions is not known.
3987 unsigned NumScalarLoads = 0;
3988
3989 for (SDValue V : Op->op_values()) {
3990 if (V.isUndef())
3991 continue;
3992
3993 unsigned &Count = ValueCounts[V];
3994 if (0 == Count)
3995 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3996 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3997
3998 // Is this value dominant? In case of a tie, prefer the highest element as
3999 // it's cheaper to insert near the beginning of a vector than it is at the
4000 // end.
4001 if (++Count >= MostCommonCount) {
4002 DominantValue = V;
4003 MostCommonCount = Count;
4004 }
4005 }
4006
4007 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
4008 unsigned NumDefElts = NumElts - NumUndefElts;
4009 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
4010
4011 // Don't perform this optimization when optimizing for size, since
4012 // materializing elements and inserting them tends to cause code bloat.
4013 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
4014 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
4015 ((MostCommonCount > DominantValueCountThreshold) ||
4016 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
4017 // Start by splatting the most common element.
4018 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
4019
4020 DenseSet<SDValue> Processed{DominantValue};
4021
4022 // We can handle an insert into the last element (of a splat) via
4023 // v(f)slide1down. This is slightly better than the vslideup insert
4024 // lowering as it avoids the need for a vector group temporary. It
4025 // is also better than using vmerge.vx as it avoids the need to
4026 // materialize the mask in a vector register.
4027 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
4028 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
4029 LastOp != DominantValue) {
4030 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4031 auto OpCode =
4032 VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
4033 if (!VT.isFloatingPoint())
4034 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
4035 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4036 LastOp, Mask, VL);
4037 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
4038 Processed.insert(LastOp);
4039 }
4040
4041 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
4042 for (const auto &OpIdx : enumerate(Op->ops())) {
4043 const SDValue &V = OpIdx.value();
4044 if (V.isUndef() || !Processed.insert(V).second)
4045 continue;
4046 if (ValueCounts[V] == 1) {
4047 Vec = DAG.getInsertVectorElt(DL, Vec, V, OpIdx.index());
4048 } else {
4049 // Blend in all instances of this value using a VSELECT, using a
4050 // mask where each bit signals whether that element is the one
4051 // we're after.
4053 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
4054 return DAG.getConstant(V == V1, DL, XLenVT);
4055 });
4056 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
4057 DAG.getBuildVector(SelMaskTy, DL, Ops),
4058 DAG.getSplatBuildVector(VT, DL, V), Vec);
4059 }
4060 }
4061
4062 return Vec;
4063 }
4064
4065 return SDValue();
4066}
4067
4069 const RISCVSubtarget &Subtarget) {
4070 MVT VT = Op.getSimpleValueType();
4071 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4072
4073 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4074
4075 SDLoc DL(Op);
4076 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4077
4078 MVT XLenVT = Subtarget.getXLenVT();
4079 unsigned NumElts = Op.getNumOperands();
4080
4081 if (VT.getVectorElementType() == MVT::i1) {
4082 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
4083 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
4084 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
4085 }
4086
4087 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
4088 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
4089 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
4090 }
4091
4092 // Lower constant mask BUILD_VECTORs via an integer vector type, in
4093 // scalar integer chunks whose bit-width depends on the number of mask
4094 // bits and XLEN.
4095 // First, determine the most appropriate scalar integer type to use. This
4096 // is at most XLenVT, but may be shrunk to a smaller vector element type
4097 // according to the size of the final vector - use i8 chunks rather than
4098 // XLenVT if we're producing a v8i1. This results in more consistent
4099 // codegen across RV32 and RV64.
4100 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
4101 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
4102 // If we have to use more than one INSERT_VECTOR_ELT then this
4103 // optimization is likely to increase code size; avoid performing it in
4104 // such a case. We can use a load from a constant pool in this case.
4105 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
4106 return SDValue();
4107 // Now we can create our integer vector type. Note that it may be larger
4108 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
4109 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
4110 MVT IntegerViaVecVT =
4111 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
4112 IntegerViaVecElts);
4113
4114 uint64_t Bits = 0;
4115 unsigned BitPos = 0, IntegerEltIdx = 0;
4116 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
4117
4118 for (unsigned I = 0; I < NumElts;) {
4119 SDValue V = Op.getOperand(I);
4120 bool BitValue = !V.isUndef() && V->getAsZExtVal();
4121 Bits |= ((uint64_t)BitValue << BitPos);
4122 ++BitPos;
4123 ++I;
4124
4125 // Once we accumulate enough bits to fill our scalar type or process the
4126 // last element, insert into our vector and clear our accumulated data.
4127 if (I % NumViaIntegerBits == 0 || I == NumElts) {
4128 if (NumViaIntegerBits <= 32)
4129 Bits = SignExtend64<32>(Bits);
4130 SDValue Elt = DAG.getSignedConstant(Bits, DL, XLenVT);
4131 Elts[IntegerEltIdx] = Elt;
4132 Bits = 0;
4133 BitPos = 0;
4134 IntegerEltIdx++;
4135 }
4136 }
4137
4138 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
4139
4140 if (NumElts < NumViaIntegerBits) {
4141 // If we're producing a smaller vector than our minimum legal integer
4142 // type, bitcast to the equivalent (known-legal) mask type, and extract
4143 // our final mask.
4144 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
4145 Vec = DAG.getBitcast(MVT::v8i1, Vec);
4146 Vec = DAG.getExtractSubvector(DL, VT, Vec, 0);
4147 } else {
4148 // Else we must have produced an integer type with the same size as the
4149 // mask type; bitcast for the final result.
4150 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
4151 Vec = DAG.getBitcast(VT, Vec);
4152 }
4153
4154 return Vec;
4155 }
4156
4158 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
4159 : RISCVISD::VMV_V_X_VL;
4160 if (!VT.isFloatingPoint())
4161 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4162 Splat =
4163 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
4164 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4165 }
4166
4167 // Try and match index sequences, which we can lower to the vid instruction
4168 // with optional modifications. An all-undef vector is matched by
4169 // getSplatValue, above.
4170 if (SDValue Res = lowerBuildVectorViaVID(Op, DAG, Subtarget))
4171 return Res;
4172
4173 // For very small build_vectors, use a single scalar insert of a constant.
4174 // TODO: Base this on constant rematerialization cost, not size.
4175 const unsigned EltBitSize = VT.getScalarSizeInBits();
4176 if (VT.getSizeInBits() <= 32 &&
4178 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
4179 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
4180 "Unexpected sequence type");
4181 // If we can use the original VL with the modified element type, this
4182 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
4183 // be moved into InsertVSETVLI?
4184 unsigned ViaVecLen =
4185 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
4186 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
4187
4188 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
4189 uint64_t SplatValue = 0;
4190 // Construct the amalgamated value at this larger vector type.
4191 for (const auto &OpIdx : enumerate(Op->op_values())) {
4192 const auto &SeqV = OpIdx.value();
4193 if (!SeqV.isUndef())
4194 SplatValue |=
4195 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
4196 }
4197
4198 // On RV64, sign-extend from 32 to 64 bits where possible in order to
4199 // achieve better constant materializion.
4200 // On RV32, we need to sign-extend to use getSignedConstant.
4201 if (ViaIntVT == MVT::i32)
4202 SplatValue = SignExtend64<32>(SplatValue);
4203
4204 SDValue Vec = DAG.getInsertVectorElt(
4205 DL, DAG.getUNDEF(ViaVecVT),
4206 DAG.getSignedConstant(SplatValue, DL, XLenVT), 0);
4207 if (ViaVecLen != 1)
4208 Vec = DAG.getExtractSubvector(DL, MVT::getVectorVT(ViaIntVT, 1), Vec, 0);
4209 return DAG.getBitcast(VT, Vec);
4210 }
4211
4212
4213 // Attempt to detect "hidden" splats, which only reveal themselves as splats
4214 // when re-interpreted as a vector with a larger element type. For example,
4215 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
4216 // could be instead splat as
4217 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
4218 // TODO: This optimization could also work on non-constant splats, but it
4219 // would require bit-manipulation instructions to construct the splat value.
4220 SmallVector<SDValue> Sequence;
4221 const auto *BV = cast<BuildVectorSDNode>(Op);
4222 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
4224 BV->getRepeatedSequence(Sequence) &&
4225 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
4226 unsigned SeqLen = Sequence.size();
4227 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
4228 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
4229 ViaIntVT == MVT::i64) &&
4230 "Unexpected sequence type");
4231
4232 // If we can use the original VL with the modified element type, this
4233 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
4234 // be moved into InsertVSETVLI?
4235 const unsigned RequiredVL = NumElts / SeqLen;
4236 const unsigned ViaVecLen =
4237 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
4238 NumElts : RequiredVL;
4239 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
4240
4241 unsigned EltIdx = 0;
4242 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
4243 uint64_t SplatValue = 0;
4244 // Construct the amalgamated value which can be splatted as this larger
4245 // vector type.
4246 for (const auto &SeqV : Sequence) {
4247 if (!SeqV.isUndef())
4248 SplatValue |=
4249 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
4250 EltIdx++;
4251 }
4252
4253 // On RV64, sign-extend from 32 to 64 bits where possible in order to
4254 // achieve better constant materializion.
4255 // On RV32, we need to sign-extend to use getSignedConstant.
4256 if (ViaIntVT == MVT::i32)
4257 SplatValue = SignExtend64<32>(SplatValue);
4258
4259 // Since we can't introduce illegal i64 types at this stage, we can only
4260 // perform an i64 splat on RV32 if it is its own sign-extended value. That
4261 // way we can use RVV instructions to splat.
4262 assert((ViaIntVT.bitsLE(XLenVT) ||
4263 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
4264 "Unexpected bitcast sequence");
4265 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
4266 SDValue ViaVL =
4267 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
4268 MVT ViaContainerVT =
4269 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
4270 SDValue Splat =
4271 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
4272 DAG.getUNDEF(ViaContainerVT),
4273 DAG.getSignedConstant(SplatValue, DL, XLenVT), ViaVL);
4274 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
4275 if (ViaVecLen != RequiredVL)
4277 DL, MVT::getVectorVT(ViaIntVT, RequiredVL), Splat, 0);
4278 return DAG.getBitcast(VT, Splat);
4279 }
4280 }
4281
4282 // If the number of signbits allows, see if we can lower as a <N x i8>.
4283 // Our main goal here is to reduce LMUL (and thus work) required to
4284 // build the constant, but we will also narrow if the resulting
4285 // narrow vector is known to materialize cheaply.
4286 // TODO: We really should be costing the smaller vector. There are
4287 // profitable cases this misses.
4288 if (EltBitSize > 8 && VT.isInteger() &&
4289 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen()) &&
4290 DAG.ComputeMaxSignificantBits(Op) <= 8) {
4291 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
4292 DL, Op->ops());
4293 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
4294 Source, DAG, Subtarget);
4295 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
4296 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4297 }
4298
4299 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4300 return Res;
4301
4302 // For constant vectors, use generic constant pool lowering. Otherwise,
4303 // we'd have to materialize constants in GPRs just to move them into the
4304 // vector.
4305 return SDValue();
4306}
4307
4308static unsigned getPACKOpcode(unsigned DestBW,
4309 const RISCVSubtarget &Subtarget) {
4310 switch (DestBW) {
4311 default:
4312 llvm_unreachable("Unsupported pack size");
4313 case 16:
4314 return RISCV::PACKH;
4315 case 32:
4316 return Subtarget.is64Bit() ? RISCV::PACKW : RISCV::PACK;
4317 case 64:
4318 assert(Subtarget.is64Bit());
4319 return RISCV::PACK;
4320 }
4321}
4322
4323/// Double the element size of the build vector to reduce the number
4324/// of vslide1down in the build vector chain. In the worst case, this
4325/// trades three scalar operations for 1 vector operation. Scalar
4326/// operations are generally lower latency, and for out-of-order cores
4327/// we also benefit from additional parallelism.
4329 const RISCVSubtarget &Subtarget) {
4330 SDLoc DL(Op);
4331 MVT VT = Op.getSimpleValueType();
4332 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4333 MVT ElemVT = VT.getVectorElementType();
4334 if (!ElemVT.isInteger())
4335 return SDValue();
4336
4337 // TODO: Relax these architectural restrictions, possibly with costing
4338 // of the actual instructions required.
4339 if (!Subtarget.hasStdExtZbb() || !Subtarget.hasStdExtZba())
4340 return SDValue();
4341
4342 unsigned NumElts = VT.getVectorNumElements();
4343 unsigned ElemSizeInBits = ElemVT.getSizeInBits();
4344 if (ElemSizeInBits >= std::min(Subtarget.getELen(), Subtarget.getXLen()) ||
4345 NumElts % 2 != 0)
4346 return SDValue();
4347
4348 // Produce [B,A] packed into a type twice as wide. Note that all
4349 // scalars are XLenVT, possibly masked (see below).
4350 MVT XLenVT = Subtarget.getXLenVT();
4351 SDValue Mask = DAG.getConstant(
4352 APInt::getLowBitsSet(XLenVT.getSizeInBits(), ElemSizeInBits), DL, XLenVT);
4353 auto pack = [&](SDValue A, SDValue B) {
4354 // Bias the scheduling of the inserted operations to near the
4355 // definition of the element - this tends to reduce register
4356 // pressure overall.
4357 SDLoc ElemDL(B);
4358 if (Subtarget.hasStdExtZbkb())
4359 // Note that we're relying on the high bits of the result being
4360 // don't care. For PACKW, the result is *sign* extended.
4361 return SDValue(
4362 DAG.getMachineNode(getPACKOpcode(ElemSizeInBits * 2, Subtarget),
4363 ElemDL, XLenVT, A, B),
4364 0);
4365
4366 A = DAG.getNode(ISD::AND, SDLoc(A), XLenVT, A, Mask);
4367 B = DAG.getNode(ISD::AND, SDLoc(B), XLenVT, B, Mask);
4368 SDValue ShtAmt = DAG.getConstant(ElemSizeInBits, ElemDL, XLenVT);
4369 return DAG.getNode(ISD::OR, ElemDL, XLenVT, A,
4370 DAG.getNode(ISD::SHL, ElemDL, XLenVT, B, ShtAmt),
4372 };
4373
4374 SmallVector<SDValue> NewOperands;
4375 NewOperands.reserve(NumElts / 2);
4376 for (unsigned i = 0; i < VT.getVectorNumElements(); i += 2)
4377 NewOperands.push_back(pack(Op.getOperand(i), Op.getOperand(i + 1)));
4378 assert(NumElts == NewOperands.size() * 2);
4379 MVT WideVT = MVT::getIntegerVT(ElemSizeInBits * 2);
4380 MVT WideVecVT = MVT::getVectorVT(WideVT, NumElts / 2);
4381 return DAG.getNode(ISD::BITCAST, DL, VT,
4382 DAG.getBuildVector(WideVecVT, DL, NewOperands));
4383}
4384
4386 const RISCVSubtarget &Subtarget) {
4387 MVT VT = Op.getSimpleValueType();
4388 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4389
4390 MVT EltVT = VT.getVectorElementType();
4391 MVT XLenVT = Subtarget.getXLenVT();
4392
4393 SDLoc DL(Op);
4394
4395 // Proper support for f16 requires Zvfh. bf16 always requires special
4396 // handling. We need to cast the scalar to integer and create an integer
4397 // build_vector.
4398 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) || EltVT == MVT::bf16) {
4399 MVT IVT = VT.changeVectorElementType(MVT::i16);
4400 SmallVector<SDValue, 16> NewOps(Op.getNumOperands());
4401 for (const auto &[I, U] : enumerate(Op->ops())) {
4402 SDValue Elem = U.get();
4403 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4404 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin())) {
4405 // Called by LegalizeDAG, we need to use XLenVT operations since we
4406 // can't create illegal types.
4407 if (auto *C = dyn_cast<ConstantFPSDNode>(Elem)) {
4408 // Manually constant fold so the integer build_vector can be lowered
4409 // better. Waiting for DAGCombine will be too late.
4410 APInt V =
4411 C->getValueAPF().bitcastToAPInt().sext(XLenVT.getSizeInBits());
4412 NewOps[I] = DAG.getConstant(V, DL, XLenVT);
4413 } else {
4414 NewOps[I] = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Elem);
4415 }
4416 } else {
4417 // Called by scalar type legalizer, we can use i16.
4418 NewOps[I] = DAG.getBitcast(MVT::i16, Op.getOperand(I));
4419 }
4420 }
4421 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, DL, IVT, NewOps);
4422 return DAG.getBitcast(VT, Res);
4423 }
4424
4425 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
4427 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
4428
4429 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4430
4431 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4432
4433 if (VT.getVectorElementType() == MVT::i1) {
4434 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
4435 // vector type, we have a legal equivalently-sized i8 type, so we can use
4436 // that.
4437 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
4438 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
4439
4440 SDValue WideVec;
4442 // For a splat, perform a scalar truncate before creating the wider
4443 // vector.
4444 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
4445 DAG.getConstant(1, DL, Splat.getValueType()));
4446 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
4447 } else {
4448 SmallVector<SDValue, 8> Ops(Op->op_values());
4449 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
4450 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
4451 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
4452 }
4453
4454 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
4455 }
4456
4458 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
4459 return Gather;
4460
4461 // Prefer vmv.s.x/vfmv.s.f if legal to reduce work and register
4462 // pressure at high LMUL.
4463 if (all_of(Op->ops().drop_front(),
4464 [](const SDUse &U) { return U.get().isUndef(); })) {
4465 unsigned Opc =
4466 VT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
4467 if (!VT.isFloatingPoint())
4468 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4469 Splat = DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4470 Splat, VL);
4471 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4472 }
4473
4474 unsigned Opc =
4475 VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
4476 if (!VT.isFloatingPoint())
4477 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4478 Splat =
4479 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
4480 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4481 }
4482
4483 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4484 return Res;
4485
4486 // If we're compiling for an exact VLEN value, we can split our work per
4487 // register in the register group.
4488 if (const auto VLen = Subtarget.getRealVLen();
4489 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
4490 MVT ElemVT = VT.getVectorElementType();
4491 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4492 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4493 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4494 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4495 assert(M1VT == RISCVTargetLowering::getM1VT(M1VT));
4496
4497 // The following semantically builds up a fixed length concat_vector
4498 // of the component build_vectors. We eagerly lower to scalable and
4499 // insert_subvector here to avoid DAG combining it back to a large
4500 // build_vector.
4501 SmallVector<SDValue> BuildVectorOps(Op->ops());
4502 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4503 SDValue Vec = DAG.getUNDEF(ContainerVT);
4504 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
4505 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
4506 SDValue SubBV =
4507 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
4508 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
4509 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
4510 Vec = DAG.getInsertSubvector(DL, Vec, SubBV, InsertIdx);
4511 }
4512 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4513 }
4514
4515 // If we're about to resort to vslide1down (or stack usage), pack our
4516 // elements into the widest scalar type we can. This will force a VL/VTYPE
4517 // toggle, but reduces the critical path, the number of vslide1down ops
4518 // required, and possibly enables scalar folds of the values.
4519 if (SDValue Res = lowerBuildVectorViaPacking(Op, DAG, Subtarget))
4520 return Res;
4521
4522 // For m1 vectors, if we have non-undef values in both halves of our vector,
4523 // split the vector into low and high halves, build them separately, then
4524 // use a vselect to combine them. For long vectors, this cuts the critical
4525 // path of the vslide1down sequence in half, and gives us an opportunity
4526 // to special case each half independently. Note that we don't change the
4527 // length of the sub-vectors here, so if both fallback to the generic
4528 // vslide1down path, we should be able to fold the vselect into the final
4529 // vslidedown (for the undef tail) for the first half w/ masking.
4530 unsigned NumElts = VT.getVectorNumElements();
4531 unsigned NumUndefElts =
4532 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
4533 unsigned NumDefElts = NumElts - NumUndefElts;
4534 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
4535 ContainerVT.bitsLE(RISCVTargetLowering::getM1VT(ContainerVT))) {
4536 SmallVector<SDValue> SubVecAOps, SubVecBOps;
4537 SmallVector<SDValue> MaskVals;
4538 SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
4539 SubVecAOps.reserve(NumElts);
4540 SubVecBOps.reserve(NumElts);
4541 for (const auto &[Idx, U] : enumerate(Op->ops())) {
4542 SDValue Elem = U.get();
4543 if (Idx < NumElts / 2) {
4544 SubVecAOps.push_back(Elem);
4545 SubVecBOps.push_back(UndefElem);
4546 } else {
4547 SubVecAOps.push_back(UndefElem);
4548 SubVecBOps.push_back(Elem);
4549 }
4550 bool SelectMaskVal = (Idx < NumElts / 2);
4551 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4552 }
4553 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
4554 MaskVals.size() == NumElts);
4555
4556 SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
4557 SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
4558 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4559 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
4560 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
4561 }
4562
4563 // Cap the cost at a value linear to the number of elements in the vector.
4564 // The default lowering is to use the stack. The vector store + scalar loads
4565 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
4566 // being (at least) linear in LMUL. As a result, using the vslidedown
4567 // lowering for every element ends up being VL*LMUL..
4568 // TODO: Should we be directly costing the stack alternative? Doing so might
4569 // give us a more accurate upper bound.
4570 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
4571
4572 // TODO: unify with TTI getSlideCost.
4573 InstructionCost PerSlideCost = 1;
4574 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
4575 default: break;
4576 case RISCVVType::LMUL_2:
4577 PerSlideCost = 2;
4578 break;
4579 case RISCVVType::LMUL_4:
4580 PerSlideCost = 4;
4581 break;
4582 case RISCVVType::LMUL_8:
4583 PerSlideCost = 8;
4584 break;
4585 }
4586
4587 // TODO: Should we be using the build instseq then cost + evaluate scheme
4588 // we use for integer constants here?
4589 unsigned UndefCount = 0;
4590 for (const SDValue &V : Op->ops()) {
4591 if (V.isUndef()) {
4592 UndefCount++;
4593 continue;
4594 }
4595 if (UndefCount) {
4596 LinearBudget -= PerSlideCost;
4597 UndefCount = 0;
4598 }
4599 LinearBudget -= PerSlideCost;
4600 }
4601 if (UndefCount) {
4602 LinearBudget -= PerSlideCost;
4603 }
4604
4605 if (LinearBudget < 0)
4606 return SDValue();
4607
4608 assert((!VT.isFloatingPoint() ||
4609 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4610 "Illegal type which will result in reserved encoding");
4611
4612 const unsigned Policy = RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC;
4613
4614 // General case: splat the first operand and slide other operands down one
4615 // by one to form a vector. Alternatively, if every operand is an
4616 // extraction from element 0 of a vector, we use that vector from the last
4617 // extraction as the start value and slide up instead of slide down. Such that
4618 // (1) we can avoid the initial splat (2) we can turn those vslide1up into
4619 // vslideup of 1 later and eliminate the vector to scalar movement, which is
4620 // something we cannot do with vslide1down/vslidedown.
4621 // Of course, using vslide1up/vslideup might increase the register pressure,
4622 // and that's why we conservatively limit to cases where every operand is an
4623 // extraction from the first element.
4624 SmallVector<SDValue> Operands(Op->op_begin(), Op->op_end());
4625 SDValue EVec;
4626 bool SlideUp = false;
4627 auto getVSlide = [&](EVT ContainerVT, SDValue Passthru, SDValue Vec,
4628 SDValue Offset, SDValue Mask, SDValue VL) -> SDValue {
4629 if (SlideUp)
4630 return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset,
4631 Mask, VL, Policy);
4632 return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset,
4633 Mask, VL, Policy);
4634 };
4635
4636 // The reason we don't use all_of here is because we're also capturing EVec
4637 // from the last non-undef operand. If the std::execution_policy of the
4638 // underlying std::all_of is anything but std::sequenced_policy we might
4639 // capture the wrong EVec.
4640 for (SDValue V : Operands) {
4641 using namespace SDPatternMatch;
4642 SlideUp = V.isUndef() || sd_match(V, m_ExtractElt(m_Value(EVec), m_Zero()));
4643 if (!SlideUp)
4644 break;
4645 }
4646
4647 // Do not slideup if the element type of EVec is different.
4648 if (SlideUp) {
4649 MVT EVecEltVT = EVec.getSimpleValueType().getVectorElementType();
4650 MVT ContainerEltVT = ContainerVT.getVectorElementType();
4651 if (EVecEltVT != ContainerEltVT)
4652 SlideUp = false;
4653 }
4654
4655 if (SlideUp) {
4656 MVT EVecContainerVT = EVec.getSimpleValueType();
4657 // Make sure the original vector has scalable vector type.
4658 if (EVecContainerVT.isFixedLengthVector()) {
4659 EVecContainerVT =
4660 getContainerForFixedLengthVector(DAG, EVecContainerVT, Subtarget);
4661 EVec = convertToScalableVector(EVecContainerVT, EVec, DAG, Subtarget);
4662 }
4663
4664 // Adapt EVec's type into ContainerVT.
4665 if (EVecContainerVT.getVectorMinNumElements() <
4666 ContainerVT.getVectorMinNumElements())
4667 EVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), EVec, 0);
4668 else
4669 EVec = DAG.getExtractSubvector(DL, ContainerVT, EVec, 0);
4670
4671 // Reverse the elements as we're going to slide up from the last element.
4672 std::reverse(Operands.begin(), Operands.end());
4673 }
4674
4675 SDValue Vec;
4676 UndefCount = 0;
4677 for (SDValue V : Operands) {
4678 if (V.isUndef()) {
4679 UndefCount++;
4680 continue;
4681 }
4682
4683 // Start our sequence with either a TA splat or extract source in the
4684 // hopes that hardware is able to recognize there's no dependency on the
4685 // prior value of our temporary register.
4686 if (!Vec) {
4687 if (SlideUp) {
4688 Vec = EVec;
4689 } else {
4690 Vec = DAG.getSplatVector(VT, DL, V);
4691 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4692 }
4693
4694 UndefCount = 0;
4695 continue;
4696 }
4697
4698 if (UndefCount) {
4699 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4700 Vec = getVSlide(ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Offset, Mask,
4701 VL);
4702 UndefCount = 0;
4703 }
4704
4705 unsigned Opcode;
4706 if (VT.isFloatingPoint())
4707 Opcode = SlideUp ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VFSLIDE1DOWN_VL;
4708 else
4709 Opcode = SlideUp ? RISCVISD::VSLIDE1UP_VL : RISCVISD::VSLIDE1DOWN_VL;
4710
4711 if (!VT.isFloatingPoint())
4712 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
4713 Vec = DAG.getNode(Opcode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4714 V, Mask, VL);
4715 }
4716 if (UndefCount) {
4717 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4718 Vec = getVSlide(ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Offset, Mask,
4719 VL);
4720 }
4721 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4722}
4723
4724static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4726 SelectionDAG &DAG) {
4727 if (!Passthru)
4728 Passthru = DAG.getUNDEF(VT);
4730 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4731 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4732 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4733 // node in order to try and match RVV vector/scalar instructions.
4734 if ((LoC >> 31) == HiC)
4735 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4736
4737 // Use vmv.v.x with EEW=32. Use either a vsetivli or vsetvli to change
4738 // VL. This can temporarily increase VL if VL less than VLMAX.
4739 if (LoC == HiC) {
4740 SDValue NewVL;
4741 if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
4742 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
4743 else
4744 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4745 MVT InterVT =
4746 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4747 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4748 DAG.getUNDEF(InterVT), Lo, NewVL);
4749 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
4750 }
4751 }
4752
4753 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4754 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4755 isa<ConstantSDNode>(Hi.getOperand(1)) &&
4756 Hi.getConstantOperandVal(1) == 31)
4757 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4758
4759 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
4760 // even if it might be sign extended.
4761 if (Hi.isUndef())
4762 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4763
4764 // Fall back to a stack store and stride x0 vector load.
4765 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
4766 Hi, VL);
4767}
4768
4769// Called by type legalization to handle splat of i64 on RV32.
4770// FIXME: We can optimize this when the type has sign or zero bits in one
4771// of the halves.
4772static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4773 SDValue Scalar, SDValue VL,
4774 SelectionDAG &DAG) {
4775 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4776 SDValue Lo, Hi;
4777 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
4778 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4779}
4780
4781// This function lowers a splat of a scalar operand Splat with the vector
4782// length VL. It ensures the final sequence is type legal, which is useful when
4783// lowering a splat after type legalization.
4784static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4785 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4786 const RISCVSubtarget &Subtarget) {
4787 bool HasPassthru = Passthru && !Passthru.isUndef();
4788 if (!HasPassthru && !Passthru)
4789 Passthru = DAG.getUNDEF(VT);
4790
4791 MVT EltVT = VT.getVectorElementType();
4792 MVT XLenVT = Subtarget.getXLenVT();
4793
4794 if (VT.isFloatingPoint()) {
4795 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
4796 EltVT == MVT::bf16) {
4797 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4798 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
4799 Scalar = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Scalar);
4800 else
4801 Scalar = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Scalar);
4802 MVT IVT = VT.changeVectorElementType(MVT::i16);
4803 Passthru = DAG.getNode(ISD::BITCAST, DL, IVT, Passthru);
4804 SDValue Splat =
4805 lowerScalarSplat(Passthru, Scalar, VL, IVT, DL, DAG, Subtarget);
4806 return DAG.getNode(ISD::BITCAST, DL, VT, Splat);
4807 }
4808 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
4809 }
4810
4811 // Simplest case is that the operand needs to be promoted to XLenVT.
4812 if (Scalar.getValueType().bitsLE(XLenVT)) {
4813 // If the operand is a constant, sign extend to increase our chances
4814 // of being able to use a .vi instruction. ANY_EXTEND would become a
4815 // a zero extend and the simm5 check in isel would fail.
4816 // FIXME: Should we ignore the upper bits in isel instead?
4817 unsigned ExtOpc =
4819 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4820 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
4821 }
4822
4823 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4824 "Unexpected scalar for splat lowering!");
4825
4826 if (isOneConstant(VL) && isNullConstant(Scalar))
4827 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
4828 DAG.getConstant(0, DL, XLenVT), VL);
4829
4830 // Otherwise use the more complicated splatting algorithm.
4831 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4832}
4833
4834// This function lowers an insert of a scalar operand Scalar into lane
4835// 0 of the vector regardless of the value of VL. The contents of the
4836// remaining lanes of the result vector are unspecified. VL is assumed
4837// to be non-zero.
4839 const SDLoc &DL, SelectionDAG &DAG,
4840 const RISCVSubtarget &Subtarget) {
4841 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4842
4843 const MVT XLenVT = Subtarget.getXLenVT();
4844 SDValue Passthru = DAG.getUNDEF(VT);
4845
4846 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4847 isNullConstant(Scalar.getOperand(1))) {
4848 SDValue ExtractedVal = Scalar.getOperand(0);
4849 // The element types must be the same.
4850 if (ExtractedVal.getValueType().getVectorElementType() ==
4851 VT.getVectorElementType()) {
4852 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4853 MVT ExtractedContainerVT = ExtractedVT;
4854 if (ExtractedContainerVT.isFixedLengthVector()) {
4855 ExtractedContainerVT = getContainerForFixedLengthVector(
4856 DAG, ExtractedContainerVT, Subtarget);
4857 ExtractedVal = convertToScalableVector(ExtractedContainerVT,
4858 ExtractedVal, DAG, Subtarget);
4859 }
4860 if (ExtractedContainerVT.bitsLE(VT))
4861 return DAG.getInsertSubvector(DL, Passthru, ExtractedVal, 0);
4862 return DAG.getExtractSubvector(DL, VT, ExtractedVal, 0);
4863 }
4864 }
4865
4866 if (VT.isFloatingPoint())
4867 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
4868 VL);
4869
4870 // Avoid the tricky legalization cases by falling back to using the
4871 // splat code which already handles it gracefully.
4872 if (!Scalar.getValueType().bitsLE(XLenVT))
4873 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
4874 DAG.getConstant(1, DL, XLenVT),
4875 VT, DL, DAG, Subtarget);
4876
4877 // If the operand is a constant, sign extend to increase our chances
4878 // of being able to use a .vi instruction. ANY_EXTEND would become a
4879 // a zero extend and the simm5 check in isel would fail.
4880 // FIXME: Should we ignore the upper bits in isel instead?
4881 unsigned ExtOpc =
4883 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4884 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
4885 VL);
4886}
4887
4888/// If concat_vector(V1,V2) could be folded away to some existing
4889/// vector source, return it. Note that the source may be larger
4890/// than the requested concat_vector (i.e. a extract_subvector
4891/// might be required.)
4893 EVT VT = V1.getValueType();
4894 assert(VT == V2.getValueType() && "argument types must match");
4895 // Both input must be extracts.
4896 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4898 return SDValue();
4899
4900 // Extracting from the same source.
4901 SDValue Src = V1.getOperand(0);
4902 if (Src != V2.getOperand(0) ||
4903 VT.isScalableVector() != Src.getValueType().isScalableVector())
4904 return SDValue();
4905
4906 // The extracts must extract the two halves of the source.
4907 if (V1.getConstantOperandVal(1) != 0 ||
4909 return SDValue();
4910
4911 return Src;
4912}
4913
4914// Can this shuffle be performed on exactly one (possibly larger) input?
4916
4917 if (V2.isUndef())
4918 return V1;
4919
4920 unsigned NumElts = VT.getVectorNumElements();
4921 // Src needs to have twice the number of elements.
4922 // TODO: Update shuffle lowering to add the extract subvector
4923 if (SDValue Src = foldConcatVector(V1, V2);
4924 Src && Src.getValueType().getVectorNumElements() == (NumElts * 2))
4925 return Src;
4926
4927 return SDValue();
4928}
4929
4930/// Is this shuffle interleaving contiguous elements from one vector into the
4931/// even elements and contiguous elements from another vector into the odd
4932/// elements. \p EvenSrc will contain the element that should be in the first
4933/// even element. \p OddSrc will contain the element that should be in the first
4934/// odd element. These can be the first element in a source or the element half
4935/// way through the source.
4936static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4937 int &OddSrc, const RISCVSubtarget &Subtarget) {
4938 // We need to be able to widen elements to the next larger integer type or
4939 // use the zip2a instruction at e64.
4940 if (VT.getScalarSizeInBits() >= Subtarget.getELen() &&
4941 !Subtarget.hasVendorXRivosVizip())
4942 return false;
4943
4944 int Size = Mask.size();
4945 int NumElts = VT.getVectorNumElements();
4946 assert(Size == (int)NumElts && "Unexpected mask size");
4947
4948 SmallVector<unsigned, 2> StartIndexes;
4949 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
4950 return false;
4951
4952 EvenSrc = StartIndexes[0];
4953 OddSrc = StartIndexes[1];
4954
4955 // One source should be low half of first vector.
4956 if (EvenSrc != 0 && OddSrc != 0)
4957 return false;
4958
4959 // Subvectors will be subtracted from either at the start of the two input
4960 // vectors, or at the start and middle of the first vector if it's an unary
4961 // interleave.
4962 // In both cases, HalfNumElts will be extracted.
4963 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4964 // we'll create an illegal extract_subvector.
4965 // FIXME: We could support other values using a slidedown first.
4966 int HalfNumElts = NumElts / 2;
4967 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4968}
4969
4970/// Is this mask representing a masked combination of two slides?
4972 std::array<std::pair<int, int>, 2> &SrcInfo) {
4973 if (!llvm::isMaskedSlidePair(Mask, Mask.size(), SrcInfo))
4974 return false;
4975
4976 // Avoid matching vselect idioms
4977 if (SrcInfo[0].second == 0 && SrcInfo[1].second == 0)
4978 return false;
4979 // Prefer vslideup as the second instruction, and identity
4980 // only as the initial instruction.
4981 if ((SrcInfo[0].second > 0 && SrcInfo[1].second < 0) ||
4982 SrcInfo[1].second == 0)
4983 std::swap(SrcInfo[0], SrcInfo[1]);
4984 assert(SrcInfo[0].first != -1 && "Must find one slide");
4985 return true;
4986}
4987
4988// Exactly matches the semantics of a previously existing custom matcher
4989// to allow migration to new matcher without changing output.
4990static bool isElementRotate(const std::array<std::pair<int, int>, 2> &SrcInfo,
4991 unsigned NumElts) {
4992 if (SrcInfo[1].first == -1)
4993 return true;
4994 return SrcInfo[0].second < 0 && SrcInfo[1].second > 0 &&
4995 SrcInfo[1].second - SrcInfo[0].second == (int)NumElts;
4996}
4997
4998static bool isAlternating(const std::array<std::pair<int, int>, 2> &SrcInfo,
4999 ArrayRef<int> Mask, unsigned Factor,
5000 bool RequiredPolarity) {
5001 int NumElts = Mask.size();
5002 for (const auto &[Idx, M] : enumerate(Mask)) {
5003 if (M < 0)
5004 continue;
5005 int Src = M >= NumElts;
5006 int Diff = (int)Idx - (M % NumElts);
5007 bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second;
5008 assert(C != (Src == SrcInfo[0].first && Diff == SrcInfo[0].second) &&
5009 "Must match exactly one of the two slides");
5010 if (RequiredPolarity != (C == (Idx / Factor) % 2))
5011 return false;
5012 }
5013 return true;
5014}
5015
5016/// Given a shuffle which can be represented as a pair of two slides,
5017/// see if it is a zipeven idiom. Zipeven is:
5018/// vs2: a0 a1 a2 a3
5019/// vs1: b0 b1 b2 b3
5020/// vd: a0 b0 a2 b2
5021static bool isZipEven(const std::array<std::pair<int, int>, 2> &SrcInfo,
5022 ArrayRef<int> Mask, unsigned &Factor) {
5023 Factor = SrcInfo[1].second;
5024 return SrcInfo[0].second == 0 && isPowerOf2_32(Factor) &&
5025 Mask.size() % Factor == 0 &&
5026 isAlternating(SrcInfo, Mask, Factor, true);
5027}
5028
5029/// Given a shuffle which can be represented as a pair of two slides,
5030/// see if it is a zipodd idiom. Zipodd is:
5031/// vs2: a0 a1 a2 a3
5032/// vs1: b0 b1 b2 b3
5033/// vd: a1 b1 a3 b3
5034/// Note that the operand order is swapped due to the way we canonicalize
5035/// the slides, so SrCInfo[0] is vs1, and SrcInfo[1] is vs2.
5036static bool isZipOdd(const std::array<std::pair<int, int>, 2> &SrcInfo,
5037 ArrayRef<int> Mask, unsigned &Factor) {
5038 Factor = -SrcInfo[1].second;
5039 return SrcInfo[0].second == 0 && isPowerOf2_32(Factor) &&
5040 Mask.size() % Factor == 0 &&
5041 isAlternating(SrcInfo, Mask, Factor, false);
5042}
5043
5044// Lower a deinterleave shuffle to SRL and TRUNC. Factor must be
5045// 2, 4, 8 and the integer type Factor-times larger than VT's
5046// element type must be a legal element type.
5047// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (Factor=2, Index=0)
5048// -> [p, q, r, s] (Factor=2, Index=1)
5050 SDValue Src, unsigned Factor,
5051 unsigned Index, SelectionDAG &DAG) {
5052 unsigned EltBits = VT.getScalarSizeInBits();
5053 ElementCount SrcEC = Src.getValueType().getVectorElementCount();
5054 MVT WideSrcVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor),
5055 SrcEC.divideCoefficientBy(Factor));
5056 MVT ResVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits),
5057 SrcEC.divideCoefficientBy(Factor));
5058 Src = DAG.getBitcast(WideSrcVT, Src);
5059
5060 unsigned Shift = Index * EltBits;
5061 SDValue Res = DAG.getNode(ISD::SRL, DL, WideSrcVT, Src,
5062 DAG.getConstant(Shift, DL, WideSrcVT));
5063 Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT, Res);
5065 Res = DAG.getBitcast(CastVT, Res);
5066 return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), Res, 0);
5067}
5068
5069/// Match a single source shuffle which is an identity except that some
5070/// particular element is repeated. This can be lowered as a masked
5071/// vrgather.vi/vx. Note that the two source form of this is handled
5072/// by the recursive splitting logic and doesn't need special handling.
5074 const RISCVSubtarget &Subtarget,
5075 SelectionDAG &DAG) {
5076
5077 SDLoc DL(SVN);
5078 MVT VT = SVN->getSimpleValueType(0);
5079 SDValue V1 = SVN->getOperand(0);
5080 assert(SVN->getOperand(1).isUndef());
5081 ArrayRef<int> Mask = SVN->getMask();
5082 const unsigned NumElts = VT.getVectorNumElements();
5083 MVT XLenVT = Subtarget.getXLenVT();
5084
5085 std::optional<int> SplatIdx;
5086 for (auto [I, M] : enumerate(Mask)) {
5087 if (M == -1 || I == (unsigned)M)
5088 continue;
5089 if (SplatIdx && *SplatIdx != M)
5090 return SDValue();
5091 SplatIdx = M;
5092 }
5093
5094 if (!SplatIdx)
5095 return SDValue();
5096
5097 SmallVector<SDValue> MaskVals;
5098 for (int MaskIndex : Mask) {
5099 bool SelectMaskVal = MaskIndex == *SplatIdx;
5100 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5101 }
5102 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5103 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5104 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5105 SDValue Splat = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT),
5106 SmallVector<int>(NumElts, *SplatIdx));
5107 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, Splat, V1);
5108}
5109
5110// Lower the following shuffle to vslidedown.
5111// a)
5112// t49: v8i8 = extract_subvector t13, Constant:i64<0>
5113// t109: v8i8 = extract_subvector t13, Constant:i64<8>
5114// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
5115// b)
5116// t69: v16i16 = extract_subvector t68, Constant:i64<0>
5117// t23: v8i16 = extract_subvector t69, Constant:i64<0>
5118// t29: v4i16 = extract_subvector t23, Constant:i64<4>
5119// t26: v8i16 = extract_subvector t69, Constant:i64<8>
5120// t30: v4i16 = extract_subvector t26, Constant:i64<0>
5121// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
5123 SDValue V1, SDValue V2,
5124 ArrayRef<int> Mask,
5125 const RISCVSubtarget &Subtarget,
5126 SelectionDAG &DAG) {
5127 auto findNonEXTRACT_SUBVECTORParent =
5128 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
5129 uint64_t Offset = 0;
5130 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
5131 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
5132 // a scalable vector. But we don't want to match the case.
5133 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
5134 Offset += Parent.getConstantOperandVal(1);
5135 Parent = Parent.getOperand(0);
5136 }
5137 return std::make_pair(Parent, Offset);
5138 };
5139
5140 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
5141 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
5142
5143 // Extracting from the same source.
5144 SDValue Src = V1Src;
5145 if (Src != V2Src)
5146 return SDValue();
5147
5148 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
5149 SmallVector<int, 16> NewMask(Mask);
5150 for (size_t i = 0; i != NewMask.size(); ++i) {
5151 if (NewMask[i] == -1)
5152 continue;
5153
5154 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
5155 NewMask[i] = NewMask[i] + V1IndexOffset;
5156 } else {
5157 // Minus NewMask.size() is needed. Otherwise, the b case would be
5158 // <5,6,7,12> instead of <5,6,7,8>.
5159 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
5160 }
5161 }
5162
5163 // First index must be known and non-zero. It will be used as the slidedown
5164 // amount.
5165 if (NewMask[0] <= 0)
5166 return SDValue();
5167
5168 // NewMask is also continuous.
5169 for (unsigned i = 1; i != NewMask.size(); ++i)
5170 if (NewMask[i - 1] + 1 != NewMask[i])
5171 return SDValue();
5172
5173 MVT XLenVT = Subtarget.getXLenVT();
5174 MVT SrcVT = Src.getSimpleValueType();
5175 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
5176 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
5177 SDValue Slidedown =
5178 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
5179 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
5180 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
5181 return DAG.getExtractSubvector(
5182 DL, VT, convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget), 0);
5183}
5184
5185// Because vslideup leaves the destination elements at the start intact, we can
5186// use it to perform shuffles that insert subvectors:
5187//
5188// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
5189// ->
5190// vsetvli zero, 8, e8, mf2, ta, ma
5191// vslideup.vi v8, v9, 4
5192//
5193// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
5194// ->
5195// vsetvli zero, 5, e8, mf2, tu, ma
5196// vslideup.v1 v8, v9, 2
5198 SDValue V1, SDValue V2,
5199 ArrayRef<int> Mask,
5200 const RISCVSubtarget &Subtarget,
5201 SelectionDAG &DAG) {
5202 unsigned NumElts = VT.getVectorNumElements();
5203 int NumSubElts, Index;
5204 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
5205 Index))
5206 return SDValue();
5207
5208 bool OpsSwapped = Mask[Index] < (int)NumElts;
5209 SDValue InPlace = OpsSwapped ? V2 : V1;
5210 SDValue ToInsert = OpsSwapped ? V1 : V2;
5211
5212 MVT XLenVT = Subtarget.getXLenVT();
5213 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5214 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
5215 // We slide up by the index that the subvector is being inserted at, and set
5216 // VL to the index + the number of elements being inserted.
5217 unsigned Policy =
5219 // If the we're adding a suffix to the in place vector, i.e. inserting right
5220 // up to the very end of it, then we don't actually care about the tail.
5221 if (NumSubElts + Index >= (int)NumElts)
5222 Policy |= RISCVVType::TAIL_AGNOSTIC;
5223
5224 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
5225 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
5226 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
5227
5228 SDValue Res;
5229 // If we're inserting into the lowest elements, use a tail undisturbed
5230 // vmv.v.v.
5231 if (Index == 0)
5232 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
5233 VL);
5234 else
5235 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
5236 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
5237 return convertFromScalableVector(VT, Res, DAG, Subtarget);
5238}
5239
5240/// Match v(f)slide1up/down idioms. These operations involve sliding
5241/// N-1 elements to make room for an inserted scalar at one end.
5243 SDValue V1, SDValue V2,
5244 ArrayRef<int> Mask,
5245 const RISCVSubtarget &Subtarget,
5246 SelectionDAG &DAG) {
5247 bool OpsSwapped = false;
5248 if (!isa<BuildVectorSDNode>(V1)) {
5249 if (!isa<BuildVectorSDNode>(V2))
5250 return SDValue();
5251 std::swap(V1, V2);
5252 OpsSwapped = true;
5253 }
5254 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
5255 if (!Splat)
5256 return SDValue();
5257
5258 // Return true if the mask could describe a slide of Mask.size() - 1
5259 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
5260 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
5261 const unsigned S = (Offset > 0) ? 0 : -Offset;
5262 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
5263 for (unsigned i = S; i != E; ++i)
5264 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
5265 return false;
5266 return true;
5267 };
5268
5269 const unsigned NumElts = VT.getVectorNumElements();
5270 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
5271 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
5272 return SDValue();
5273
5274 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
5275 // Inserted lane must come from splat, undef scalar is legal but not profitable.
5276 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
5277 return SDValue();
5278
5279 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5280 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5281
5282 // zvfhmin and zvfbfmin don't have vfslide1{down,up}.vf so use fmv.x.h +
5283 // vslide1{down,up}.vx instead.
5284 if (VT.getVectorElementType() == MVT::bf16 ||
5285 (VT.getVectorElementType() == MVT::f16 &&
5286 !Subtarget.hasVInstructionsF16())) {
5287 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
5288 Splat =
5289 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Splat);
5290 V2 = DAG.getBitcast(
5291 IntVT, convertToScalableVector(ContainerVT, V2, DAG, Subtarget));
5292 SDValue Vec = DAG.getNode(
5293 IsVSlidedown ? RISCVISD::VSLIDE1DOWN_VL : RISCVISD::VSLIDE1UP_VL, DL,
5294 IntVT, DAG.getUNDEF(IntVT), V2, Splat, TrueMask, VL);
5295 Vec = DAG.getBitcast(ContainerVT, Vec);
5296 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5297 }
5298
5299 auto OpCode = IsVSlidedown ?
5300 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) :
5301 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL);
5302 if (!VT.isFloatingPoint())
5303 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
5304 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
5305 DAG.getUNDEF(ContainerVT),
5306 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
5307 Splat, TrueMask, VL);
5308 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5309}
5310
5311/// Match a mask which "spreads" the leading elements of a vector evenly
5312/// across the result. Factor is the spread amount, and Index is the
5313/// offset applied. (on success, Index < Factor) This is the inverse
5314/// of a deinterleave with the same Factor and Index. This is analogous
5315/// to an interleave, except that all but one lane is undef.
5317 unsigned &Index) {
5318 SmallVector<bool> LaneIsUndef(Factor, true);
5319 for (unsigned i = 0; i < Mask.size(); i++)
5320 LaneIsUndef[i % Factor] &= (Mask[i] == -1);
5321
5322 bool Found = false;
5323 for (unsigned i = 0; i < Factor; i++) {
5324 if (LaneIsUndef[i])
5325 continue;
5326 if (Found)
5327 return false;
5328 Index = i;
5329 Found = true;
5330 }
5331 if (!Found)
5332 return false;
5333
5334 for (unsigned i = 0; i < Mask.size() / Factor; i++) {
5335 unsigned j = i * Factor + Index;
5336 if (Mask[j] != -1 && (unsigned)Mask[j] != i)
5337 return false;
5338 }
5339 return true;
5340}
5341
5342static SDValue lowerVZIP(unsigned Opc, SDValue Op0, SDValue Op1,
5343 const SDLoc &DL, SelectionDAG &DAG,
5344 const RISCVSubtarget &Subtarget) {
5345 assert(RISCVISD::RI_VZIPEVEN_VL == Opc || RISCVISD::RI_VZIPODD_VL == Opc ||
5346 RISCVISD::RI_VZIP2A_VL == Opc || RISCVISD::RI_VZIP2B_VL == Opc ||
5347 RISCVISD::RI_VUNZIP2A_VL == Opc || RISCVISD::RI_VUNZIP2B_VL == Opc);
5349
5350 MVT VT = Op0.getSimpleValueType();
5352 Op0 = DAG.getBitcast(IntVT, Op0);
5353 Op1 = DAG.getBitcast(IntVT, Op1);
5354
5355 MVT ContainerVT = IntVT;
5356 if (VT.isFixedLengthVector()) {
5357 ContainerVT = getContainerForFixedLengthVector(DAG, IntVT, Subtarget);
5358 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
5359 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
5360 }
5361
5362 MVT InnerVT = ContainerVT;
5363 auto [Mask, VL] = getDefaultVLOps(IntVT, InnerVT, DL, DAG, Subtarget);
5364 if (Op1.isUndef() &&
5365 ContainerVT.bitsGT(RISCVTargetLowering::getM1VT(ContainerVT)) &&
5366 (RISCVISD::RI_VUNZIP2A_VL == Opc || RISCVISD::RI_VUNZIP2B_VL == Opc)) {
5367 InnerVT = ContainerVT.getHalfNumVectorElementsVT();
5368 VL = DAG.getConstant(VT.getVectorNumElements() / 2, DL,
5369 Subtarget.getXLenVT());
5370 Mask = getAllOnesMask(InnerVT, VL, DL, DAG);
5371 unsigned HighIdx = InnerVT.getVectorElementCount().getKnownMinValue();
5372 Op1 = DAG.getExtractSubvector(DL, InnerVT, Op0, HighIdx);
5373 Op0 = DAG.getExtractSubvector(DL, InnerVT, Op0, 0);
5374 }
5375
5376 SDValue Passthru = DAG.getUNDEF(InnerVT);
5377 SDValue Res = DAG.getNode(Opc, DL, InnerVT, Op0, Op1, Passthru, Mask, VL);
5378 if (InnerVT.bitsLT(ContainerVT))
5379 Res = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), Res, 0);
5380 if (IntVT.isFixedLengthVector())
5381 Res = convertFromScalableVector(IntVT, Res, DAG, Subtarget);
5382 Res = DAG.getBitcast(VT, Res);
5383 return Res;
5384}
5385
5386// Given a vector a, b, c, d return a vector Factor times longer
5387// with Factor-1 undef's between elements. Ex:
5388// a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0)
5389// undef, a, undef, b, undef, c, undef, d (Factor=2, Index=1)
5390static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index,
5391 const SDLoc &DL, SelectionDAG &DAG) {
5392
5393 MVT VT = V.getSimpleValueType();
5394 unsigned EltBits = VT.getScalarSizeInBits();
5396 V = DAG.getBitcast(VT.changeTypeToInteger(), V);
5397
5398 MVT WideVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor), EC);
5399
5400 SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, V);
5401 // TODO: On rv32, the constant becomes a splat_vector_parts which does not
5402 // allow the SHL to fold away if Index is 0.
5403 if (Index != 0)
5404 Result = DAG.getNode(ISD::SHL, DL, WideVT, Result,
5405 DAG.getConstant(EltBits * Index, DL, WideVT));
5406 // Make sure to use original element type
5408 EC.multiplyCoefficientBy(Factor));
5409 return DAG.getBitcast(ResultVT, Result);
5410}
5411
5412// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
5413// to create an interleaved vector of <[vscale x] n*2 x ty>.
5414// This requires that the size of ty is less than the subtarget's maximum ELEN.
5416 const SDLoc &DL, SelectionDAG &DAG,
5417 const RISCVSubtarget &Subtarget) {
5418
5419 // FIXME: Not only does this optimize the code, it fixes some correctness
5420 // issues because MIR does not have freeze.
5421 if (EvenV.isUndef())
5422 return getWideningSpread(OddV, 2, 1, DL, DAG);
5423 if (OddV.isUndef())
5424 return getWideningSpread(EvenV, 2, 0, DL, DAG);
5425
5426 MVT VecVT = EvenV.getSimpleValueType();
5427 MVT VecContainerVT = VecVT; // <vscale x n x ty>
5428 // Convert fixed vectors to scalable if needed
5429 if (VecContainerVT.isFixedLengthVector()) {
5430 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
5431 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
5432 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
5433 }
5434
5435 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
5436
5437 // We're working with a vector of the same size as the resulting
5438 // interleaved vector, but with half the number of elements and
5439 // twice the SEW (Hence the restriction on not using the maximum
5440 // ELEN)
5441 MVT WideVT =
5443 VecVT.getVectorElementCount());
5444 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
5445 if (WideContainerVT.isFixedLengthVector())
5446 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
5447
5448 // Bitcast the input vectors to integers in case they are FP
5449 VecContainerVT = VecContainerVT.changeTypeToInteger();
5450 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
5451 OddV = DAG.getBitcast(VecContainerVT, OddV);
5452
5453 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
5454 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
5455
5456 SDValue Interleaved;
5457 if (Subtarget.hasStdExtZvbb()) {
5458 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
5459 SDValue OffsetVec =
5460 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
5461 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
5462 OffsetVec, Passthru, Mask, VL);
5463 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
5464 Interleaved, EvenV, Passthru, Mask, VL);
5465 } else {
5466 // FIXME: We should freeze the odd vector here. We already handled the case
5467 // of provably undef/poison above.
5468
5469 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
5470 // vwaddu.vv
5471 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
5472 OddV, Passthru, Mask, VL);
5473
5474 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
5475 SDValue AllOnesVec = DAG.getSplatVector(
5476 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
5477 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
5478 OddV, AllOnesVec, Passthru, Mask, VL);
5479
5480 // Add the two together so we get
5481 // (OddV * 0xff...ff) + (OddV + EvenV)
5482 // = (OddV * 0x100...00) + EvenV
5483 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
5484 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
5485 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
5486 Interleaved, OddsMul, Passthru, Mask, VL);
5487 }
5488
5489 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
5490 MVT ResultContainerVT = MVT::getVectorVT(
5491 VecVT.getVectorElementType(), // Make sure to use original type
5492 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
5493 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
5494
5495 // Convert back to a fixed vector if needed
5496 MVT ResultVT =
5499 if (ResultVT.isFixedLengthVector())
5500 Interleaved =
5501 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
5502
5503 return Interleaved;
5504}
5505
5506// If we have a vector of bits that we want to reverse, we can use a vbrev on a
5507// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
5509 SelectionDAG &DAG,
5510 const RISCVSubtarget &Subtarget) {
5511 SDLoc DL(SVN);
5512 MVT VT = SVN->getSimpleValueType(0);
5513 SDValue V = SVN->getOperand(0);
5514 unsigned NumElts = VT.getVectorNumElements();
5515
5516 assert(VT.getVectorElementType() == MVT::i1);
5517
5519 SVN->getMask().size()) ||
5520 !SVN->getOperand(1).isUndef())
5521 return SDValue();
5522
5523 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
5524 EVT ViaVT = EVT::getVectorVT(
5525 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
5526 EVT ViaBitVT =
5527 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
5528
5529 // If we don't have zvbb or the larger element type > ELEN, the operation will
5530 // be illegal.
5532 ViaVT) ||
5533 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
5534 return SDValue();
5535
5536 // If the bit vector doesn't fit exactly into the larger element type, we need
5537 // to insert it into the larger vector and then shift up the reversed bits
5538 // afterwards to get rid of the gap introduced.
5539 if (ViaEltSize > NumElts)
5540 V = DAG.getInsertSubvector(DL, DAG.getUNDEF(ViaBitVT), V, 0);
5541
5542 SDValue Res =
5543 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
5544
5545 // Shift up the reversed bits if the vector didn't exactly fit into the larger
5546 // element type.
5547 if (ViaEltSize > NumElts)
5548 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
5549 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
5550
5551 Res = DAG.getBitcast(ViaBitVT, Res);
5552
5553 if (ViaEltSize > NumElts)
5554 Res = DAG.getExtractSubvector(DL, VT, Res, 0);
5555 return Res;
5556}
5557
5559 const RISCVSubtarget &Subtarget,
5560 MVT &RotateVT, unsigned &RotateAmt) {
5561 unsigned NumElts = VT.getVectorNumElements();
5562 unsigned EltSizeInBits = VT.getScalarSizeInBits();
5563 unsigned NumSubElts;
5564 if (!ShuffleVectorInst::isBitRotateMask(Mask, EltSizeInBits, 2,
5565 NumElts, NumSubElts, RotateAmt))
5566 return false;
5567 RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
5568 NumElts / NumSubElts);
5569
5570 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
5571 return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);
5572}
5573
5574// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
5575// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
5576// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
5578 SelectionDAG &DAG,
5579 const RISCVSubtarget &Subtarget) {
5580 SDLoc DL(SVN);
5581
5582 EVT VT = SVN->getValueType(0);
5583 unsigned RotateAmt;
5584 MVT RotateVT;
5585 if (!isLegalBitRotate(SVN->getMask(), VT, Subtarget, RotateVT, RotateAmt))
5586 return SDValue();
5587
5588 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
5589
5590 SDValue Rotate;
5591 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
5592 // so canonicalize to vrev8.
5593 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
5594 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
5595 else
5596 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
5597 DAG.getConstant(RotateAmt, DL, RotateVT));
5598
5599 return DAG.getBitcast(VT, Rotate);
5600}
5601
5602// If compiling with an exactly known VLEN, see if we can split a
5603// shuffle on m2 or larger into a small number of m1 sized shuffles
5604// which write each destination registers exactly once.
5606 SelectionDAG &DAG,
5607 const RISCVSubtarget &Subtarget) {
5608 SDLoc DL(SVN);
5609 MVT VT = SVN->getSimpleValueType(0);
5610 SDValue V1 = SVN->getOperand(0);
5611 SDValue V2 = SVN->getOperand(1);
5612 ArrayRef<int> Mask = SVN->getMask();
5613
5614 // If we don't know exact data layout, not much we can do. If this
5615 // is already m1 or smaller, no point in splitting further.
5616 const auto VLen = Subtarget.getRealVLen();
5617 if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)
5618 return SDValue();
5619
5620 // Avoid picking up bitrotate patterns which we have a linear-in-lmul
5621 // expansion for.
5622 unsigned RotateAmt;
5623 MVT RotateVT;
5624 if (isLegalBitRotate(Mask, VT, Subtarget, RotateVT, RotateAmt))
5625 return SDValue();
5626
5627 MVT ElemVT = VT.getVectorElementType();
5628 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
5629
5630 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5631 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
5632 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
5633 assert(M1VT == RISCVTargetLowering::getM1VT(M1VT));
5634 unsigned NumOpElts = M1VT.getVectorMinNumElements();
5635 unsigned NumElts = ContainerVT.getVectorMinNumElements();
5636 unsigned NumOfSrcRegs = NumElts / NumOpElts;
5637 unsigned NumOfDestRegs = NumElts / NumOpElts;
5638 // The following semantically builds up a fixed length concat_vector
5639 // of the component shuffle_vectors. We eagerly lower to scalable here
5640 // to avoid DAG combining it back to a large shuffle_vector again.
5641 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5642 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
5644 Operands;
5646 Mask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs,
5647 [&]() { Operands.emplace_back(); },
5648 [&](ArrayRef<int> SrcSubMask, unsigned SrcVecIdx, unsigned DstVecIdx) {
5649 Operands.emplace_back().emplace_back(SrcVecIdx, UINT_MAX,
5650 SmallVector<int>(SrcSubMask));
5651 },
5652 [&](ArrayRef<int> SrcSubMask, unsigned Idx1, unsigned Idx2, bool NewReg) {
5653 if (NewReg)
5654 Operands.emplace_back();
5655 Operands.back().emplace_back(Idx1, Idx2, SmallVector<int>(SrcSubMask));
5656 });
5657 assert(Operands.size() == NumOfDestRegs && "Whole vector must be processed");
5658 // Note: check that we do not emit too many shuffles here to prevent code
5659 // size explosion.
5660 // TODO: investigate, if it can be improved by extra analysis of the masks to
5661 // check if the code is more profitable.
5662 unsigned NumShuffles = std::accumulate(
5663 Operands.begin(), Operands.end(), 0u,
5664 [&](unsigned N,
5665 ArrayRef<std::tuple<unsigned, unsigned, SmallVector<int>>> Data) {
5666 if (Data.empty())
5667 return N;
5668 N += Data.size();
5669 for (const auto &P : Data) {
5670 unsigned Idx2 = std::get<1>(P);
5671 ArrayRef<int> Mask = std::get<2>(P);
5672 if (Idx2 != UINT_MAX)
5673 ++N;
5674 else if (ShuffleVectorInst::isIdentityMask(Mask, Mask.size()))
5675 --N;
5676 }
5677 return N;
5678 });
5679 if ((NumOfDestRegs > 2 && NumShuffles > NumOfDestRegs) ||
5680 (NumOfDestRegs <= 2 && NumShuffles >= 4))
5681 return SDValue();
5682 auto ExtractValue = [&, &DAG = DAG](SDValue SrcVec, unsigned ExtractIdx) {
5683 SDValue SubVec = DAG.getExtractSubvector(DL, M1VT, SrcVec, ExtractIdx);
5684 SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
5685 return SubVec;
5686 };
5687 auto PerformShuffle = [&, &DAG = DAG](SDValue SubVec1, SDValue SubVec2,
5689 SDValue SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec1, SubVec2, Mask);
5690 return SubVec;
5691 };
5692 SDValue Vec = DAG.getUNDEF(ContainerVT);
5693 for (auto [I, Data] : enumerate(Operands)) {
5694 if (Data.empty())
5695 continue;
5697 for (unsigned I : seq<unsigned>(Data.size())) {
5698 const auto &[Idx1, Idx2, _] = Data[I];
5699 // If the shuffle contains permutation of odd number of elements,
5700 // Idx1 might be used already in the first iteration.
5701 //
5702 // Idx1 = shuffle Idx1, Idx2
5703 // Idx1 = shuffle Idx1, Idx3
5704 SDValue &V = Values.try_emplace(Idx1).first->getSecond();
5705 if (!V)
5706 V = ExtractValue(Idx1 >= NumOfSrcRegs ? V2 : V1,
5707 (Idx1 % NumOfSrcRegs) * NumOpElts);
5708 if (Idx2 != UINT_MAX) {
5709 SDValue &V = Values.try_emplace(Idx2).first->getSecond();
5710 if (!V)
5711 V = ExtractValue(Idx2 >= NumOfSrcRegs ? V2 : V1,
5712 (Idx2 % NumOfSrcRegs) * NumOpElts);
5713 }
5714 }
5715 SDValue V;
5716 for (const auto &[Idx1, Idx2, Mask] : Data) {
5717 SDValue V1 = Values.at(Idx1);
5718 SDValue V2 = Idx2 == UINT_MAX ? V1 : Values.at(Idx2);
5719 V = PerformShuffle(V1, V2, Mask);
5720 Values[Idx1] = V;
5721 }
5722
5723 unsigned InsertIdx = I * NumOpElts;
5724 V = convertToScalableVector(M1VT, V, DAG, Subtarget);
5725 Vec = DAG.getInsertSubvector(DL, Vec, V, InsertIdx);
5726 }
5727 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5728}
5729
5730// Matches a subset of compress masks with a contiguous prefix of output
5731// elements. This could be extended to allow gaps by deciding which
5732// source elements to spuriously demand.
5734 int Last = -1;
5735 bool SawUndef = false;
5736 for (const auto &[Idx, M] : enumerate(Mask)) {
5737 if (M == -1) {
5738 SawUndef = true;
5739 continue;
5740 }
5741 if (SawUndef)
5742 return false;
5743 if (Idx > (unsigned)M)
5744 return false;
5745 if (M <= Last)
5746 return false;
5747 Last = M;
5748 }
5749 return true;
5750}
5751
5752/// Given a shuffle where the indices are disjoint between the two sources,
5753/// e.g.:
5754///
5755/// t2:v4i8 = vector_shuffle t0:v4i8, t1:v4i8, <2, 7, 1, 4>
5756///
5757/// Merge the two sources into one and do a single source shuffle:
5758///
5759/// t2:v4i8 = vselect t1:v4i8, t0:v4i8, <0, 1, 0, 1>
5760/// t3:v4i8 = vector_shuffle t2:v4i8, undef, <2, 3, 1, 0>
5761///
5762/// A vselect will either be merged into a masked instruction or be lowered as a
5763/// vmerge.vvm, which is cheaper than a vrgather.vv.
5765 SelectionDAG &DAG,
5766 const RISCVSubtarget &Subtarget) {
5767 MVT VT = SVN->getSimpleValueType(0);
5768 MVT XLenVT = Subtarget.getXLenVT();
5769 SDLoc DL(SVN);
5770
5771 const ArrayRef<int> Mask = SVN->getMask();
5772
5773 // Work out which source each lane will come from.
5774 SmallVector<int, 16> Srcs(Mask.size(), -1);
5775
5776 for (int Idx : Mask) {
5777 if (Idx == -1)
5778 continue;
5779 unsigned SrcIdx = Idx % Mask.size();
5780 int Src = (uint32_t)Idx < Mask.size() ? 0 : 1;
5781 if (Srcs[SrcIdx] == -1)
5782 // Mark this source as using this lane.
5783 Srcs[SrcIdx] = Src;
5784 else if (Srcs[SrcIdx] != Src)
5785 // The other source is using this lane: not disjoint.
5786 return SDValue();
5787 }
5788
5789 SmallVector<SDValue> SelectMaskVals;
5790 for (int Lane : Srcs) {
5791 if (Lane == -1)
5792 SelectMaskVals.push_back(DAG.getUNDEF(XLenVT));
5793 else
5794 SelectMaskVals.push_back(DAG.getConstant(Lane ? 0 : 1, DL, XLenVT));
5795 }
5796 MVT MaskVT = VT.changeVectorElementType(MVT::i1);
5797 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, SelectMaskVals);
5798 SDValue Select = DAG.getNode(ISD::VSELECT, DL, VT, SelectMask,
5799 SVN->getOperand(0), SVN->getOperand(1));
5800
5801 // Move all indices relative to the first source.
5802 SmallVector<int> NewMask(Mask.size());
5803 for (unsigned I = 0; I < Mask.size(); I++) {
5804 if (Mask[I] == -1)
5805 NewMask[I] = -1;
5806 else
5807 NewMask[I] = Mask[I] % Mask.size();
5808 }
5809
5810 return DAG.getVectorShuffle(VT, DL, Select, DAG.getUNDEF(VT), NewMask);
5811}
5812
5813/// Is this mask local (i.e. elements only move within their local span), and
5814/// repeating (that is, the same rearrangement is being done within each span)?
5815static bool isLocalRepeatingShuffle(ArrayRef<int> Mask, int Span) {
5816 // Require a prefix from the original mask until the consumer code
5817 // is adjusted to rewrite the mask instead of just taking a prefix.
5818 for (auto [I, M] : enumerate(Mask)) {
5819 if (M == -1)
5820 continue;
5821 if ((M / Span) != (int)(I / Span))
5822 return false;
5823 int SpanIdx = I % Span;
5824 int Expected = M % Span;
5825 if (Mask[SpanIdx] != Expected)
5826 return false;
5827 }
5828 return true;
5829}
5830
5831/// Is this mask only using elements from the first span of the input?
5832static bool isLowSourceShuffle(ArrayRef<int> Mask, int Span) {
5833 return all_of(Mask, [&](const auto &Idx) { return Idx == -1 || Idx < Span; });
5834}
5835
5836/// Return true for a mask which performs an arbitrary shuffle within the first
5837/// span, and then repeats that same result across all remaining spans. Note
5838/// that this doesn't check if all the inputs come from a single span!
5839static bool isSpanSplatShuffle(ArrayRef<int> Mask, int Span) {
5840 // Require a prefix from the original mask until the consumer code
5841 // is adjusted to rewrite the mask instead of just taking a prefix.
5842 for (auto [I, M] : enumerate(Mask)) {
5843 if (M == -1)
5844 continue;
5845 int SpanIdx = I % Span;
5846 if (Mask[SpanIdx] != M)
5847 return false;
5848 }
5849 return true;
5850}
5851
5852/// Try to widen element type to get a new mask value for a better permutation
5853/// sequence. This doesn't try to inspect the widened mask for profitability;
5854/// we speculate the widened form is equal or better. This has the effect of
5855/// reducing mask constant sizes - allowing cheaper materialization sequences
5856/// - and index sequence sizes - reducing register pressure and materialization
5857/// cost, at the cost of (possibly) an extra VTYPE toggle.
5859 SDLoc DL(Op);
5860 MVT VT = Op.getSimpleValueType();
5861 MVT ScalarVT = VT.getVectorElementType();
5862 unsigned ElementSize = ScalarVT.getFixedSizeInBits();
5863 SDValue V0 = Op.getOperand(0);
5864 SDValue V1 = Op.getOperand(1);
5865 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
5866
5867 // Avoid wasted work leading to isTypeLegal check failing below
5868 if (ElementSize > 32)
5869 return SDValue();
5870
5871 SmallVector<int, 8> NewMask;
5872 if (!widenShuffleMaskElts(Mask, NewMask))
5873 return SDValue();
5874
5875 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(ElementSize * 2)
5876 : MVT::getIntegerVT(ElementSize * 2);
5877 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
5878 if (!DAG.getTargetLoweringInfo().isTypeLegal(NewVT))
5879 return SDValue();
5880 V0 = DAG.getBitcast(NewVT, V0);
5881 V1 = DAG.getBitcast(NewVT, V1);
5882 return DAG.getBitcast(VT, DAG.getVectorShuffle(NewVT, DL, V0, V1, NewMask));
5883}
5884
5886 const RISCVSubtarget &Subtarget) {
5887 SDValue V1 = Op.getOperand(0);
5888 SDValue V2 = Op.getOperand(1);
5889 SDLoc DL(Op);
5890 MVT XLenVT = Subtarget.getXLenVT();
5891 MVT VT = Op.getSimpleValueType();
5892 unsigned NumElts = VT.getVectorNumElements();
5894
5895 if (VT.getVectorElementType() == MVT::i1) {
5896 // Lower to a vror.vi of a larger element type if possible before we promote
5897 // i1s to i8s.
5898 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5899 return V;
5900 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
5901 return V;
5902
5903 // Promote i1 shuffle to i8 shuffle.
5904 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
5905 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
5906 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
5907 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
5908 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
5909 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
5910 ISD::SETNE);
5911 }
5912
5913 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5914
5915 // Store the return value in a single variable instead of structured bindings
5916 // so that we can pass it to GetSlide below, which cannot capture structured
5917 // bindings until C++20.
5918 auto TrueMaskVL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5919 auto [TrueMask, VL] = TrueMaskVL;
5920
5921 if (SVN->isSplat()) {
5922 const int Lane = SVN->getSplatIndex();
5923 if (Lane >= 0) {
5924 MVT SVT = VT.getVectorElementType();
5925
5926 // Turn splatted vector load into a strided load with an X0 stride.
5927 SDValue V = V1;
5928 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
5929 // with undef.
5930 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
5931 int Offset = Lane;
5932 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
5933 int OpElements =
5934 V.getOperand(0).getSimpleValueType().getVectorNumElements();
5935 V = V.getOperand(Offset / OpElements);
5936 Offset %= OpElements;
5937 }
5938
5939 // We need to ensure the load isn't atomic or volatile.
5940 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
5941 auto *Ld = cast<LoadSDNode>(V);
5942 Offset *= SVT.getStoreSize();
5943 SDValue NewAddr = DAG.getMemBasePlusOffset(
5944 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
5945
5946 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
5947 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
5948 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
5949 SDValue IntID =
5950 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
5951 SDValue Ops[] = {Ld->getChain(),
5952 IntID,
5953 DAG.getUNDEF(ContainerVT),
5954 NewAddr,
5955 DAG.getRegister(RISCV::X0, XLenVT),
5956 VL};
5957 SDValue NewLoad = DAG.getMemIntrinsicNode(
5958 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
5960 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
5961 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
5962 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
5963 }
5964
5965 MVT SplatVT = ContainerVT;
5966
5967 // f16 with zvfhmin and bf16 need to use an integer scalar load.
5968 if (SVT == MVT::bf16 ||
5969 (SVT == MVT::f16 && !Subtarget.hasStdExtZfh())) {
5970 SVT = MVT::i16;
5971 SplatVT = ContainerVT.changeVectorElementType(SVT);
5972 }
5973
5974 // Otherwise use a scalar load and splat. This will give the best
5975 // opportunity to fold a splat into the operation. ISel can turn it into
5976 // the x0 strided load if we aren't able to fold away the select.
5977 if (SVT.isFloatingPoint())
5978 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
5979 Ld->getPointerInfo().getWithOffset(Offset),
5980 Ld->getBaseAlign(), Ld->getMemOperand()->getFlags());
5981 else
5982 V = DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
5983 Ld->getPointerInfo().getWithOffset(Offset), SVT,
5984 Ld->getBaseAlign(),
5985 Ld->getMemOperand()->getFlags());
5987
5988 unsigned Opc = SplatVT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
5989 : RISCVISD::VMV_V_X_VL;
5990 SDValue Splat =
5991 DAG.getNode(Opc, DL, SplatVT, DAG.getUNDEF(ContainerVT), V, VL);
5992 Splat = DAG.getBitcast(ContainerVT, Splat);
5993 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
5994 }
5995
5996 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5997 assert(Lane < (int)NumElts && "Unexpected lane!");
5998 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
5999 V1, DAG.getConstant(Lane, DL, XLenVT),
6000 DAG.getUNDEF(ContainerVT), TrueMask, VL);
6001 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6002 }
6003 }
6004
6005 // For exact VLEN m2 or greater, try to split to m1 operations if we
6006 // can split cleanly.
6007 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
6008 return V;
6009
6010 ArrayRef<int> Mask = SVN->getMask();
6011
6012 if (SDValue V =
6013 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
6014 return V;
6015
6016 if (SDValue V =
6017 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
6018 return V;
6019
6020 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
6021 // available.
6022 if (Subtarget.hasStdExtZvkb())
6023 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
6024 return V;
6025
6026 if (ShuffleVectorInst::isReverseMask(Mask, NumElts) && V2.isUndef() &&
6027 NumElts != 2)
6028 return DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V1);
6029
6030 // If this is a deinterleave(2,4,8) and we can widen the vector, then we can
6031 // use shift and truncate to perform the shuffle.
6032 // TODO: For Factor=6, we can perform the first step of the deinterleave via
6033 // shift-and-trunc reducing total cost for everything except an mf8 result.
6034 // TODO: For Factor=4,8, we can do the same when the ratio isn't high enough
6035 // to do the entire operation.
6036 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
6037 const unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
6038 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
6039 for (unsigned Factor = 2; Factor <= MaxFactor; Factor <<= 1) {
6040 unsigned Index = 0;
6041 if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor, Index) &&
6042 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {
6043 if (SDValue Src = getSingleShuffleSrc(VT, V1, V2))
6044 return getDeinterleaveShiftAndTrunc(DL, VT, Src, Factor, Index, DAG);
6045 if (1 < count_if(Mask,
6046 [&Mask](int Idx) { return Idx < (int)Mask.size(); }) &&
6047 1 < count_if(Mask, [&Mask](int Idx) {
6048 return Idx >= (int)Mask.size();
6049 })) {
6050 // Narrow each source and concatenate them.
6051 // FIXME: For small LMUL it is better to concatenate first.
6052 MVT EltVT = VT.getVectorElementType();
6053 auto EltCnt = VT.getVectorElementCount();
6054 MVT SubVT =
6055 MVT::getVectorVT(EltVT, EltCnt.divideCoefficientBy(Factor));
6056
6057 SDValue Lo =
6058 getDeinterleaveShiftAndTrunc(DL, SubVT, V1, Factor, Index, DAG);
6059 SDValue Hi =
6060 getDeinterleaveShiftAndTrunc(DL, SubVT, V2, Factor, Index, DAG);
6061
6062 SDValue Concat =
6065 if (Factor == 2)
6066 return Concat;
6067
6068 SDValue Vec = DAG.getUNDEF(VT);
6069 return DAG.getInsertSubvector(DL, Vec, Concat, 0);
6070 }
6071 }
6072 }
6073 }
6074
6075 // If this is a deinterleave(2), try using vunzip{a,b}. This mostly catches
6076 // e64 which can't match above.
6077 unsigned Index = 0;
6078 if (Subtarget.hasVendorXRivosVizip() &&
6080 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {
6081 unsigned Opc =
6082 Index == 0 ? RISCVISD::RI_VUNZIP2A_VL : RISCVISD::RI_VUNZIP2B_VL;
6083 if (V2.isUndef())
6084 return lowerVZIP(Opc, V1, V2, DL, DAG, Subtarget);
6085 if (auto VLEN = Subtarget.getRealVLen();
6086 VLEN && VT.getSizeInBits().getKnownMinValue() % *VLEN == 0)
6087 return lowerVZIP(Opc, V1, V2, DL, DAG, Subtarget);
6088 if (SDValue Src = foldConcatVector(V1, V2)) {
6089 EVT NewVT = VT.getDoubleNumVectorElementsVT();
6090 Src = DAG.getExtractSubvector(DL, NewVT, Src, 0);
6091 SDValue Res =
6092 lowerVZIP(Opc, Src, DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
6093 return DAG.getExtractSubvector(DL, VT, Res, 0);
6094 }
6095 // Deinterleave each source and concatenate them, or concat first, then
6096 // deinterleave.
6097 if (1 < count_if(Mask,
6098 [&Mask](int Idx) { return Idx < (int)Mask.size(); }) &&
6099 1 < count_if(Mask,
6100 [&Mask](int Idx) { return Idx >= (int)Mask.size(); })) {
6101
6102 const unsigned EltSize = VT.getScalarSizeInBits();
6103 const unsigned MinVLMAX = Subtarget.getRealMinVLen() / EltSize;
6104 if (NumElts < MinVLMAX) {
6105 MVT ConcatVT = VT.getDoubleNumVectorElementsVT();
6106 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, V1, V2);
6107 SDValue Res =
6108 lowerVZIP(Opc, Concat, DAG.getUNDEF(ConcatVT), DL, DAG, Subtarget);
6109 return DAG.getExtractSubvector(DL, VT, Res, 0);
6110 }
6111
6112 SDValue Lo = lowerVZIP(Opc, V1, DAG.getUNDEF(VT), DL, DAG, Subtarget);
6113 SDValue Hi = lowerVZIP(Opc, V2, DAG.getUNDEF(VT), DL, DAG, Subtarget);
6114
6115 MVT SubVT = VT.getHalfNumVectorElementsVT();
6116 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT,
6117 DAG.getExtractSubvector(DL, SubVT, Lo, 0),
6118 DAG.getExtractSubvector(DL, SubVT, Hi, 0));
6119 }
6120 }
6121
6122 if (SDValue V =
6123 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
6124 return V;
6125
6126 // Detect an interleave shuffle and lower to
6127 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
6128 int EvenSrc, OddSrc;
6129 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget) &&
6130 !(NumElts == 2 &&
6131 ShuffleVectorInst::isSingleSourceMask(Mask, Mask.size()))) {
6132 // Extract the halves of the vectors.
6133 MVT HalfVT = VT.getHalfNumVectorElementsVT();
6134
6135 // Recognize if one half is actually undef; the matching above will
6136 // otherwise reuse the even stream for the undef one. This improves
6137 // spread(2) shuffles.
6138 bool LaneIsUndef[2] = { true, true};
6139 for (const auto &[Idx, M] : enumerate(Mask))
6140 LaneIsUndef[Idx % 2] &= (M == -1);
6141
6142 int Size = Mask.size();
6143 SDValue EvenV, OddV;
6144 if (LaneIsUndef[0]) {
6145 EvenV = DAG.getUNDEF(HalfVT);
6146 } else {
6147 assert(EvenSrc >= 0 && "Undef source?");
6148 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
6149 EvenV = DAG.getExtractSubvector(DL, HalfVT, EvenV, EvenSrc % Size);
6150 }
6151
6152 if (LaneIsUndef[1]) {
6153 OddV = DAG.getUNDEF(HalfVT);
6154 } else {
6155 assert(OddSrc >= 0 && "Undef source?");
6156 OddV = (OddSrc / Size) == 0 ? V1 : V2;
6157 OddV = DAG.getExtractSubvector(DL, HalfVT, OddV, OddSrc % Size);
6158 }
6159
6160 // Prefer vzip2a if available.
6161 // TODO: Extend to matching zip2b if EvenSrc and OddSrc allow.
6162 if (Subtarget.hasVendorXRivosVizip()) {
6163 EvenV = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), EvenV, 0);
6164 OddV = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), OddV, 0);
6165 return lowerVZIP(RISCVISD::RI_VZIP2A_VL, EvenV, OddV, DL, DAG, Subtarget);
6166 }
6167 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
6168 }
6169
6170 // Recognize a pattern which can handled via a pair of vslideup/vslidedown
6171 // instructions (in any combination) with masking on the second instruction.
6172 // Also handles masked slides into an identity source, and single slides
6173 // without masking. Avoid matching bit rotates (which are not also element
6174 // rotates) as slide pairs. This is a performance heuristic, not a
6175 // functional check.
6176 std::array<std::pair<int, int>, 2> SrcInfo;
6177 unsigned RotateAmt;
6178 MVT RotateVT;
6179 if (::isMaskedSlidePair(Mask, SrcInfo) &&
6180 (isElementRotate(SrcInfo, NumElts) ||
6181 !isLegalBitRotate(Mask, VT, Subtarget, RotateVT, RotateAmt))) {
6182 SDValue Sources[2];
6183 auto GetSourceFor = [&](const std::pair<int, int> &Info) {
6184 int SrcIdx = Info.first;
6185 assert(SrcIdx == 0 || SrcIdx == 1);
6186 SDValue &Src = Sources[SrcIdx];
6187 if (!Src) {
6188 SDValue SrcV = SrcIdx == 0 ? V1 : V2;
6189 Src = convertToScalableVector(ContainerVT, SrcV, DAG, Subtarget);
6190 }
6191 return Src;
6192 };
6193 auto GetSlide = [&](const std::pair<int, int> &Src, SDValue Mask,
6194 SDValue Passthru) {
6195 auto [TrueMask, VL] = TrueMaskVL;
6196 SDValue SrcV = GetSourceFor(Src);
6197 int SlideAmt = Src.second;
6198 if (SlideAmt == 0) {
6199 // Should never be second operation
6200 assert(Mask == TrueMask);
6201 return SrcV;
6202 }
6203 if (SlideAmt < 0)
6204 return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, SrcV,
6205 DAG.getConstant(-SlideAmt, DL, XLenVT), Mask, VL,
6207 return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, SrcV,
6208 DAG.getConstant(SlideAmt, DL, XLenVT), Mask, VL,
6210 };
6211
6212 if (SrcInfo[1].first == -1) {
6213 SDValue Res = DAG.getUNDEF(ContainerVT);
6214 Res = GetSlide(SrcInfo[0], TrueMask, Res);
6215 return convertFromScalableVector(VT, Res, DAG, Subtarget);
6216 }
6217
6218 if (Subtarget.hasVendorXRivosVizip()) {
6219 bool TryWiden = false;
6220 unsigned Factor;
6221 if (isZipEven(SrcInfo, Mask, Factor)) {
6222 if (Factor == 1) {
6223 SDValue Src1 = SrcInfo[0].first == 0 ? V1 : V2;
6224 SDValue Src2 = SrcInfo[1].first == 0 ? V1 : V2;
6225 return lowerVZIP(RISCVISD::RI_VZIPEVEN_VL, Src1, Src2, DL, DAG,
6226 Subtarget);
6227 }
6228 TryWiden = true;
6229 }
6230 if (isZipOdd(SrcInfo, Mask, Factor)) {
6231 if (Factor == 1) {
6232 SDValue Src1 = SrcInfo[1].first == 0 ? V1 : V2;
6233 SDValue Src2 = SrcInfo[0].first == 0 ? V1 : V2;
6234 return lowerVZIP(RISCVISD::RI_VZIPODD_VL, Src1, Src2, DL, DAG,
6235 Subtarget);
6236 }
6237 TryWiden = true;
6238 }
6239 // If we found a widening oppurtunity which would let us form a
6240 // zipeven or zipodd, use the generic code to widen the shuffle
6241 // and recurse through this logic.
6242 if (TryWiden)
6243 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
6244 return V;
6245 }
6246
6247 // Build the mask. Note that vslideup unconditionally preserves elements
6248 // below the slide amount in the destination, and thus those elements are
6249 // undefined in the mask. If the mask ends up all true (or undef), it
6250 // will be folded away by general logic.
6251 SmallVector<SDValue> MaskVals;
6252 for (const auto &[Idx, M] : enumerate(Mask)) {
6253 if (M < 0 ||
6254 (SrcInfo[1].second > 0 && Idx < (unsigned)SrcInfo[1].second)) {
6255 MaskVals.push_back(DAG.getUNDEF(XLenVT));
6256 continue;
6257 }
6258 int Src = M >= (int)NumElts;
6259 int Diff = (int)Idx - (M % NumElts);
6260 bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second;
6261 assert(C ^ (Src == SrcInfo[0].first && Diff == SrcInfo[0].second) &&
6262 "Must match exactly one of the two slides");
6263 MaskVals.push_back(DAG.getConstant(C, DL, XLenVT));
6264 }
6265 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
6266 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
6267 SDValue SelectMask = convertToScalableVector(
6268 ContainerVT.changeVectorElementType(MVT::i1),
6269 DAG.getBuildVector(MaskVT, DL, MaskVals), DAG, Subtarget);
6270
6271 SDValue Res = DAG.getUNDEF(ContainerVT);
6272 Res = GetSlide(SrcInfo[0], TrueMask, Res);
6273 Res = GetSlide(SrcInfo[1], SelectMask, Res);
6274 return convertFromScalableVector(VT, Res, DAG, Subtarget);
6275 }
6276
6277 // Handle any remaining single source shuffles
6278 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
6279 if (V2.isUndef()) {
6280 // We might be able to express the shuffle as a bitrotate. But even if we
6281 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
6282 // shifts and a vor will have a higher throughput than a vrgather.
6283 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
6284 return V;
6285
6286 if (SDValue V = lowerVECTOR_SHUFFLEAsVRGatherVX(SVN, Subtarget, DAG))
6287 return V;
6288
6289 // Match a spread(4,8) which can be done via extend and shift. Spread(2)
6290 // is fully covered in interleave(2) above, so it is ignored here.
6291 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
6292 unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
6293 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
6294 for (unsigned Factor = 4; Factor <= MaxFactor; Factor <<= 1) {
6295 unsigned Index;
6296 if (RISCVTargetLowering::isSpreadMask(Mask, Factor, Index)) {
6297 MVT NarrowVT =
6298 MVT::getVectorVT(VT.getVectorElementType(), NumElts / Factor);
6299 SDValue Src = DAG.getExtractSubvector(DL, NarrowVT, V1, 0);
6300 return getWideningSpread(Src, Factor, Index, DL, DAG);
6301 }
6302 }
6303 }
6304
6305 // If only a prefix of the source elements influence a prefix of the
6306 // destination elements, try to see if we can reduce the required LMUL
6307 unsigned MinVLen = Subtarget.getRealMinVLen();
6308 unsigned MinVLMAX = MinVLen / VT.getScalarSizeInBits();
6309 if (NumElts > MinVLMAX) {
6310 unsigned MaxIdx = 0;
6311 for (auto [I, M] : enumerate(Mask)) {
6312 if (M == -1)
6313 continue;
6314 MaxIdx = std::max(std::max((unsigned)I, (unsigned)M), MaxIdx);
6315 }
6316 unsigned NewNumElts =
6317 std::max((uint64_t)MinVLMAX, PowerOf2Ceil(MaxIdx + 1));
6318 if (NewNumElts != NumElts) {
6319 MVT NewVT = MVT::getVectorVT(VT.getVectorElementType(), NewNumElts);
6320 V1 = DAG.getExtractSubvector(DL, NewVT, V1, 0);
6321 SDValue Res = DAG.getVectorShuffle(NewVT, DL, V1, DAG.getUNDEF(NewVT),
6322 Mask.take_front(NewNumElts));
6323 return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), Res, 0);
6324 }
6325 }
6326
6327 // Before hitting generic lowering fallbacks, try to widen the mask
6328 // to a wider SEW.
6329 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
6330 return V;
6331
6332 // Can we generate a vcompress instead of a vrgather? These scale better
6333 // at high LMUL, at the cost of not being able to fold a following select
6334 // into them. The mask constants are also smaller than the index vector
6335 // constants, and thus easier to materialize.
6336 if (isCompressMask(Mask)) {
6337 SmallVector<SDValue> MaskVals(NumElts,
6338 DAG.getConstant(false, DL, XLenVT));
6339 for (auto Idx : Mask) {
6340 if (Idx == -1)
6341 break;
6342 assert(Idx >= 0 && (unsigned)Idx < NumElts);
6343 MaskVals[Idx] = DAG.getConstant(true, DL, XLenVT);
6344 }
6345 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
6346 SDValue CompressMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
6347 return DAG.getNode(ISD::VECTOR_COMPRESS, DL, VT, V1, CompressMask,
6348 DAG.getUNDEF(VT));
6349 }
6350
6351 if (VT.getScalarSizeInBits() == 8 &&
6352 any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
6353 // On such a vector we're unable to use i8 as the index type.
6354 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
6355 // may involve vector splitting if we're already at LMUL=8, or our
6356 // user-supplied maximum fixed-length LMUL.
6357 return SDValue();
6358 }
6359
6360 // Base case for the two operand recursion below - handle the worst case
6361 // single source shuffle.
6362 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
6363 MVT IndexVT = VT.changeTypeToInteger();
6364 // Since we can't introduce illegal index types at this stage, use i16 and
6365 // vrgatherei16 if the corresponding index type for plain vrgather is greater
6366 // than XLenVT.
6367 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
6368 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
6369 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
6370 }
6371
6372 // If the mask allows, we can do all the index computation in 16 bits. This
6373 // requires less work and less register pressure at high LMUL, and creates
6374 // smaller constants which may be cheaper to materialize.
6375 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
6376 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
6377 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
6378 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
6379 }
6380
6381 MVT IndexContainerVT =
6382 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
6383
6384 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
6385 SmallVector<SDValue> GatherIndicesLHS;
6386 for (int MaskIndex : Mask) {
6387 bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
6388 GatherIndicesLHS.push_back(IsLHSIndex
6389 ? DAG.getConstant(MaskIndex, DL, XLenVT)
6390 : DAG.getUNDEF(XLenVT));
6391 }
6392 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
6393 LHSIndices =
6394 convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
6395 // At m1 and less, there's no point trying any of the high LMUL splitting
6396 // techniques. TODO: Should we reconsider this for DLEN < VLEN?
6397 if (NumElts <= MinVLMAX) {
6398 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
6399 DAG.getUNDEF(ContainerVT), TrueMask, VL);
6400 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6401 }
6402
6403 const MVT M1VT = RISCVTargetLowering::getM1VT(ContainerVT);
6404 EVT SubIndexVT = M1VT.changeVectorElementType(IndexVT.getScalarType());
6405 auto [InnerTrueMask, InnerVL] =
6406 getDefaultScalableVLOps(M1VT, DL, DAG, Subtarget);
6407 int N =
6408 ContainerVT.getVectorMinNumElements() / M1VT.getVectorMinNumElements();
6409 assert(isPowerOf2_32(N) && N <= 8);
6410
6411 // If we have a locally repeating mask, then we can reuse the first
6412 // register in the index register group for all registers within the
6413 // source register group. TODO: This generalizes to m2, and m4.
6414 if (isLocalRepeatingShuffle(Mask, MinVLMAX)) {
6415 SDValue SubIndex = DAG.getExtractSubvector(DL, SubIndexVT, LHSIndices, 0);
6416 SDValue Gather = DAG.getUNDEF(ContainerVT);
6417 for (int i = 0; i < N; i++) {
6418 unsigned SubIdx = M1VT.getVectorMinNumElements() * i;
6419 SDValue SubV1 = DAG.getExtractSubvector(DL, M1VT, V1, SubIdx);
6420 SDValue SubVec =
6421 DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
6422 DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
6423 Gather = DAG.getInsertSubvector(DL, Gather, SubVec, SubIdx);
6424 }
6425 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6426 }
6427
6428 // If we have a shuffle which only uses the first register in our source
6429 // register group, and repeats the same index across all spans, we can
6430 // use a single vrgather (and possibly some register moves).
6431 // TODO: This can be generalized for m2 or m4, or for any shuffle for
6432 // which we can do a linear number of shuffles to form an m1 which
6433 // contains all the output elements.
6434 if (isLowSourceShuffle(Mask, MinVLMAX) &&
6435 isSpanSplatShuffle(Mask, MinVLMAX)) {
6436 SDValue SubV1 = DAG.getExtractSubvector(DL, M1VT, V1, 0);
6437 SDValue SubIndex = DAG.getExtractSubvector(DL, SubIndexVT, LHSIndices, 0);
6438 SDValue SubVec = DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
6439 DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
6440 SDValue Gather = DAG.getUNDEF(ContainerVT);
6441 for (int i = 0; i < N; i++)
6442 Gather = DAG.getInsertSubvector(DL, Gather, SubVec,
6443 M1VT.getVectorMinNumElements() * i);
6444 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6445 }
6446
6447 // If we have a shuffle which only uses the first register in our
6448 // source register group, we can do a linear number of m1 vrgathers
6449 // reusing the same source register (but with different indices)
6450 // TODO: This can be generalized for m2 or m4, or for any shuffle
6451 // for which we can do a vslidedown followed by this expansion.
6452 if (isLowSourceShuffle(Mask, MinVLMAX)) {
6453 SDValue SlideAmt =
6454 DAG.getElementCount(DL, XLenVT, M1VT.getVectorElementCount());
6455 SDValue SubV1 = DAG.getExtractSubvector(DL, M1VT, V1, 0);
6456 SDValue Gather = DAG.getUNDEF(ContainerVT);
6457 for (int i = 0; i < N; i++) {
6458 if (i != 0)
6459 LHSIndices = getVSlidedown(DAG, Subtarget, DL, IndexContainerVT,
6460 DAG.getUNDEF(IndexContainerVT), LHSIndices,
6461 SlideAmt, TrueMask, VL);
6462 SDValue SubIndex =
6463 DAG.getExtractSubvector(DL, SubIndexVT, LHSIndices, 0);
6464 SDValue SubVec =
6465 DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
6466 DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
6467 Gather = DAG.getInsertSubvector(DL, Gather, SubVec,
6468 M1VT.getVectorMinNumElements() * i);
6469 }
6470 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6471 }
6472
6473 // Fallback to generic vrgather if we can't find anything better.
6474 // On many machines, this will be O(LMUL^2)
6475 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
6476 DAG.getUNDEF(ContainerVT), TrueMask, VL);
6477 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6478 }
6479
6480 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
6481 // merged with a second vrgather.
6482 SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
6483
6484 // Now construct the mask that will be used by the blended vrgather operation.
6485 // Construct the appropriate indices into each vector.
6486 for (int MaskIndex : Mask) {
6487 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
6488 ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
6489 ? MaskIndex : -1);
6490 ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
6491 }
6492
6493 // If the mask indices are disjoint between the two sources, we can lower it
6494 // as a vselect + a single source vrgather.vv. Don't do this if we think the
6495 // operands may end up being lowered to something cheaper than a vrgather.vv.
6496 if (!DAG.isSplatValue(V2) && !DAG.isSplatValue(V1) &&
6497 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskLHS) &&
6498 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskRHS) &&
6499 !ShuffleVectorInst::isIdentityMask(ShuffleMaskLHS, NumElts) &&
6500 !ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts))
6501 if (SDValue V = lowerDisjointIndicesShuffle(SVN, DAG, Subtarget))
6502 return V;
6503
6504 // Before hitting generic lowering fallbacks, try to widen the mask
6505 // to a wider SEW.
6506 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
6507 return V;
6508
6509 // Try to pick a profitable operand order.
6510 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
6511 SwapOps = SwapOps ^ ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts);
6512
6513 // Recursively invoke lowering for each operand if we had two
6514 // independent single source shuffles, and then combine the result via a
6515 // vselect. Note that the vselect will likely be folded back into the
6516 // second permute (vrgather, or other) by the post-isel combine.
6517 V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
6518 V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);
6519
6520 SmallVector<SDValue> MaskVals;
6521 for (int MaskIndex : Mask) {
6522 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
6523 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
6524 }
6525
6526 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
6527 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
6528 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
6529
6530 if (SwapOps)
6531 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
6532 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);
6533}
6534
6536 // Only support legal VTs for other shuffles for now.
6537 if (!isTypeLegal(VT))
6538 return false;
6539
6540 // Support splats for any type. These should type legalize well.
6542 return true;
6543
6544 const unsigned NumElts = M.size();
6545 MVT SVT = VT.getSimpleVT();
6546
6547 // Not for i1 vectors.
6548 if (SVT.getScalarType() == MVT::i1)
6549 return false;
6550
6551 std::array<std::pair<int, int>, 2> SrcInfo;
6552 int Dummy1, Dummy2;
6553 return ShuffleVectorInst::isReverseMask(M, NumElts) ||
6554 (::isMaskedSlidePair(M, SrcInfo) &&
6555 isElementRotate(SrcInfo, NumElts)) ||
6556 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
6557}
6558
6559// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
6560// the exponent.
6561SDValue
6562RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
6563 SelectionDAG &DAG) const {
6564 MVT VT = Op.getSimpleValueType();
6565 unsigned EltSize = VT.getScalarSizeInBits();
6566 SDValue Src = Op.getOperand(0);
6567 SDLoc DL(Op);
6568 MVT ContainerVT = VT;
6569
6570 SDValue Mask, VL;
6571 if (Op->isVPOpcode()) {
6572 Mask = Op.getOperand(1);
6573 if (VT.isFixedLengthVector())
6574 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6575 Subtarget);
6576 VL = Op.getOperand(2);
6577 }
6578
6579 // We choose FP type that can represent the value if possible. Otherwise, we
6580 // use rounding to zero conversion for correct exponent of the result.
6581 // TODO: Use f16 for i8 when possible?
6582 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
6583 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
6584 FloatEltVT = MVT::f32;
6585 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
6586
6587 // Legal types should have been checked in the RISCVTargetLowering
6588 // constructor.
6589 // TODO: Splitting may make sense in some cases.
6590 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
6591 "Expected legal float type!");
6592
6593 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
6594 // The trailing zero count is equal to log2 of this single bit value.
6595 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
6596 SDValue Neg = DAG.getNegative(Src, DL, VT);
6597 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
6598 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
6599 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
6600 Src, Mask, VL);
6601 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
6602 }
6603
6604 // We have a legal FP type, convert to it.
6605 SDValue FloatVal;
6606 if (FloatVT.bitsGT(VT)) {
6607 if (Op->isVPOpcode())
6608 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
6609 else
6610 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
6611 } else {
6612 // Use RTZ to avoid rounding influencing exponent of FloatVal.
6613 if (VT.isFixedLengthVector()) {
6614 ContainerVT = getContainerForFixedLengthVector(VT);
6615 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
6616 }
6617 if (!Op->isVPOpcode())
6618 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6619 SDValue RTZRM =
6620 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT());
6621 MVT ContainerFloatVT =
6622 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
6623 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
6624 Src, Mask, RTZRM, VL);
6625 if (VT.isFixedLengthVector())
6626 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
6627 }
6628 // Bitcast to integer and shift the exponent to the LSB.
6629 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
6630 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
6631 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
6632
6633 SDValue Exp;
6634 // Restore back to original type. Truncation after SRL is to generate vnsrl.
6635 if (Op->isVPOpcode()) {
6636 Exp = DAG.getNode(ISD::VP_SRL, DL, IntVT, Bitcast,
6637 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
6638 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
6639 } else {
6640 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
6641 DAG.getConstant(ShiftAmt, DL, IntVT));
6642 if (IntVT.bitsLT(VT))
6643 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
6644 else if (IntVT.bitsGT(VT))
6645 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
6646 }
6647
6648 // The exponent contains log2 of the value in biased form.
6649 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
6650 // For trailing zeros, we just need to subtract the bias.
6651 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
6652 return DAG.getNode(ISD::SUB, DL, VT, Exp,
6653 DAG.getConstant(ExponentBias, DL, VT));
6654 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
6655 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
6656 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
6657
6658 // For leading zeros, we need to remove the bias and convert from log2 to
6659 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
6660 unsigned Adjust = ExponentBias + (EltSize - 1);
6661 SDValue Res;
6662 if (Op->isVPOpcode())
6663 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
6664 Mask, VL);
6665 else
6666 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
6667
6668 // The above result with zero input equals to Adjust which is greater than
6669 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
6670 if (Op.getOpcode() == ISD::CTLZ)
6671 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
6672 else if (Op.getOpcode() == ISD::VP_CTLZ)
6673 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
6674 DAG.getConstant(EltSize, DL, VT), Mask, VL);
6675 return Res;
6676}
6677
6678SDValue RISCVTargetLowering::lowerVPCttzElements(SDValue Op,
6679 SelectionDAG &DAG) const {
6680 SDLoc DL(Op);
6681 MVT XLenVT = Subtarget.getXLenVT();
6682 SDValue Source = Op->getOperand(0);
6683 MVT SrcVT = Source.getSimpleValueType();
6684 SDValue Mask = Op->getOperand(1);
6685 SDValue EVL = Op->getOperand(2);
6686
6687 if (SrcVT.isFixedLengthVector()) {
6688 MVT ContainerVT = getContainerForFixedLengthVector(SrcVT);
6689 Source = convertToScalableVector(ContainerVT, Source, DAG, Subtarget);
6690 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6691 Subtarget);
6692 SrcVT = ContainerVT;
6693 }
6694
6695 // Convert to boolean vector.
6696 if (SrcVT.getScalarType() != MVT::i1) {
6697 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
6698 SrcVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorElementCount());
6699 Source = DAG.getNode(RISCVISD::SETCC_VL, DL, SrcVT,
6700 {Source, AllZero, DAG.getCondCode(ISD::SETNE),
6701 DAG.getUNDEF(SrcVT), Mask, EVL});
6702 }
6703
6704 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Source, Mask, EVL);
6705 if (Op->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_UNDEF)
6706 // In this case, we can interpret poison as -1, so nothing to do further.
6707 return Res;
6708
6709 // Convert -1 to VL.
6710 SDValue SetCC =
6711 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
6712 Res = DAG.getSelect(DL, XLenVT, SetCC, EVL, Res);
6713 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
6714}
6715
6716// While RVV has alignment restrictions, we should always be able to load as a
6717// legal equivalently-sized byte-typed vector instead. This method is
6718// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
6719// the load is already correctly-aligned, it returns SDValue().
6720SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
6721 SelectionDAG &DAG) const {
6722 auto *Load = cast<LoadSDNode>(Op);
6723 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
6724
6726 Load->getMemoryVT(),
6727 *Load->getMemOperand()))
6728 return SDValue();
6729
6730 SDLoc DL(Op);
6731 MVT VT = Op.getSimpleValueType();
6732 unsigned EltSizeBits = VT.getScalarSizeInBits();
6733 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
6734 "Unexpected unaligned RVV load type");
6735 MVT NewVT =
6736 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
6737 assert(NewVT.isValid() &&
6738 "Expecting equally-sized RVV vector types to be legal");
6739 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
6740 Load->getPointerInfo(), Load->getBaseAlign(),
6741 Load->getMemOperand()->getFlags());
6742 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
6743}
6744
6745// While RVV has alignment restrictions, we should always be able to store as a
6746// legal equivalently-sized byte-typed vector instead. This method is
6747// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
6748// returns SDValue() if the store is already correctly aligned.
6749SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
6750 SelectionDAG &DAG) const {
6751 auto *Store = cast<StoreSDNode>(Op);
6752 assert(Store && Store->getValue().getValueType().isVector() &&
6753 "Expected vector store");
6754
6756 Store->getMemoryVT(),
6757 *Store->getMemOperand()))
6758 return SDValue();
6759
6760 SDLoc DL(Op);
6761 SDValue StoredVal = Store->getValue();
6762 MVT VT = StoredVal.getSimpleValueType();
6763 unsigned EltSizeBits = VT.getScalarSizeInBits();
6764 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
6765 "Unexpected unaligned RVV store type");
6766 MVT NewVT =
6767 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
6768 assert(NewVT.isValid() &&
6769 "Expecting equally-sized RVV vector types to be legal");
6770 StoredVal = DAG.getBitcast(NewVT, StoredVal);
6771 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
6772 Store->getPointerInfo(), Store->getBaseAlign(),
6773 Store->getMemOperand()->getFlags());
6774}
6775
6777 const RISCVSubtarget &Subtarget) {
6778 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
6779
6780 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
6781
6782 // All simm32 constants should be handled by isel.
6783 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
6784 // this check redundant, but small immediates are common so this check
6785 // should have better compile time.
6786 if (isInt<32>(Imm))
6787 return Op;
6788
6789 // We only need to cost the immediate, if constant pool lowering is enabled.
6790 if (!Subtarget.useConstantPoolForLargeInts())
6791 return Op;
6792
6794 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
6795 return Op;
6796
6797 // Optimizations below are disabled for opt size. If we're optimizing for
6798 // size, use a constant pool.
6799 if (DAG.shouldOptForSize())
6800 return SDValue();
6801
6802 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
6803 // that if it will avoid a constant pool.
6804 // It will require an extra temporary register though.
6805 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
6806 // low and high 32 bits are the same and bit 31 and 63 are set.
6807 unsigned ShiftAmt, AddOpc;
6808 RISCVMatInt::InstSeq SeqLo =
6809 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
6810 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
6811 return Op;
6812
6813 return SDValue();
6814}
6815
6816SDValue RISCVTargetLowering::lowerConstantFP(SDValue Op,
6817 SelectionDAG &DAG) const {
6818 MVT VT = Op.getSimpleValueType();
6819 const APFloat &Imm = cast<ConstantFPSDNode>(Op)->getValueAPF();
6820
6821 // Can this constant be selected by a Zfa FLI instruction?
6822 bool Negate = false;
6823 int Index = getLegalZfaFPImm(Imm, VT);
6824
6825 // If the constant is negative, try negating.
6826 if (Index < 0 && Imm.isNegative()) {
6827 Index = getLegalZfaFPImm(-Imm, VT);
6828 Negate = true;
6829 }
6830
6831 // If we couldn't find a FLI lowering, fall back to generic code.
6832 if (Index < 0)
6833 return SDValue();
6834
6835 // Emit an FLI+FNEG. We use a custom node to hide from constant folding.
6836 SDLoc DL(Op);
6837 SDValue Const =
6838 DAG.getNode(RISCVISD::FLI, DL, VT,
6839 DAG.getTargetConstant(Index, DL, Subtarget.getXLenVT()));
6840 if (!Negate)
6841 return Const;
6842
6843 return DAG.getNode(ISD::FNEG, DL, VT, Const);
6844}
6845
6847 SelectionDAG &DAG) {
6848
6849 unsigned IsData = Op.getConstantOperandVal(4);
6850
6851 // mips-p8700 we support data prefetch for now.
6852 if (Subtarget.hasVendorXMIPSCBOP() && !IsData)
6853 return Op.getOperand(0);
6854 return Op;
6855}
6856
6858 const RISCVSubtarget &Subtarget) {
6859 SDLoc dl(Op);
6860 AtomicOrdering FenceOrdering =
6861 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
6862 SyncScope::ID FenceSSID =
6863 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
6864
6865 if (Subtarget.hasStdExtZtso()) {
6866 // The only fence that needs an instruction is a sequentially-consistent
6867 // cross-thread fence.
6868 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
6869 FenceSSID == SyncScope::System)
6870 return Op;
6871
6872 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
6873 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
6874 }
6875
6876 // singlethread fences only synchronize with signal handlers on the same
6877 // thread and thus only need to preserve instruction order, not actually
6878 // enforce memory ordering.
6879 if (FenceSSID == SyncScope::SingleThread)
6880 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
6881 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
6882
6883 return Op;
6884}
6885
6886SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
6887 SelectionDAG &DAG) const {
6888 SDLoc DL(Op);
6889 MVT VT = Op.getSimpleValueType();
6890 MVT XLenVT = Subtarget.getXLenVT();
6891 unsigned Check = Op.getConstantOperandVal(1);
6892 unsigned TDCMask = 0;
6893 if (Check & fcSNan)
6894 TDCMask |= RISCV::FPMASK_Signaling_NaN;
6895 if (Check & fcQNan)
6896 TDCMask |= RISCV::FPMASK_Quiet_NaN;
6897 if (Check & fcPosInf)
6899 if (Check & fcNegInf)
6901 if (Check & fcPosNormal)
6903 if (Check & fcNegNormal)
6905 if (Check & fcPosSubnormal)
6907 if (Check & fcNegSubnormal)
6909 if (Check & fcPosZero)
6910 TDCMask |= RISCV::FPMASK_Positive_Zero;
6911 if (Check & fcNegZero)
6912 TDCMask |= RISCV::FPMASK_Negative_Zero;
6913
6914 bool IsOneBitMask = isPowerOf2_32(TDCMask);
6915
6916 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
6917
6918 if (VT.isVector()) {
6919 SDValue Op0 = Op.getOperand(0);
6920 MVT VT0 = Op.getOperand(0).getSimpleValueType();
6921
6922 if (VT.isScalableVector()) {
6923 MVT DstVT = VT0.changeVectorElementTypeToInteger();
6924 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
6925 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6926 Mask = Op.getOperand(2);
6927 VL = Op.getOperand(3);
6928 }
6929 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
6930 VL, Op->getFlags());
6931 if (IsOneBitMask)
6932 return DAG.getSetCC(DL, VT, FPCLASS,
6933 DAG.getConstant(TDCMask, DL, DstVT),
6935 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
6936 DAG.getConstant(TDCMask, DL, DstVT));
6937 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
6938 ISD::SETNE);
6939 }
6940
6941 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
6942 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6943 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
6944 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
6945 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6946 Mask = Op.getOperand(2);
6947 MVT MaskContainerVT =
6948 getContainerForFixedLengthVector(Mask.getSimpleValueType());
6949 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
6950 VL = Op.getOperand(3);
6951 }
6952 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
6953
6954 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
6955 Mask, VL, Op->getFlags());
6956
6957 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
6958 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
6959 if (IsOneBitMask) {
6960 SDValue VMSEQ =
6961 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
6962 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
6963 DAG.getUNDEF(ContainerVT), Mask, VL});
6964 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
6965 }
6966 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
6967 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
6968
6969 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
6970 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
6971 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
6972
6973 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
6974 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
6975 DAG.getUNDEF(ContainerVT), Mask, VL});
6976 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
6977 }
6978
6979 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
6980 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
6981 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
6983 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6984}
6985
6986// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
6987// operations propagate nans.
6989 const RISCVSubtarget &Subtarget) {
6990 SDLoc DL(Op);
6991 MVT VT = Op.getSimpleValueType();
6992
6993 SDValue X = Op.getOperand(0);
6994 SDValue Y = Op.getOperand(1);
6995
6996 if (!VT.isVector()) {
6997 MVT XLenVT = Subtarget.getXLenVT();
6998
6999 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
7000 // ensures that when one input is a nan, the other will also be a nan
7001 // allowing the nan to propagate. If both inputs are nan, this will swap the
7002 // inputs which is harmless.
7003
7004 SDValue NewY = Y;
7005 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
7006 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
7007 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
7008 }
7009
7010 SDValue NewX = X;
7011 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
7012 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
7013 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
7014 }
7015
7016 unsigned Opc =
7017 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
7018 return DAG.getNode(Opc, DL, VT, NewX, NewY);
7019 }
7020
7021 // Check no NaNs before converting to fixed vector scalable.
7022 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
7023 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
7024
7025 MVT ContainerVT = VT;
7026 if (VT.isFixedLengthVector()) {
7027 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
7028 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
7029 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
7030 }
7031
7032 SDValue Mask, VL;
7033 if (Op->isVPOpcode()) {
7034 Mask = Op.getOperand(2);
7035 if (VT.isFixedLengthVector())
7036 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
7037 Subtarget);
7038 VL = Op.getOperand(3);
7039 } else {
7040 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
7041 }
7042
7043 SDValue NewY = Y;
7044 if (!XIsNeverNan) {
7045 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
7046 {X, X, DAG.getCondCode(ISD::SETOEQ),
7047 DAG.getUNDEF(ContainerVT), Mask, VL});
7048 NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X,
7049 DAG.getUNDEF(ContainerVT), VL);
7050 }
7051
7052 SDValue NewX = X;
7053 if (!YIsNeverNan) {
7054 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
7055 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
7056 DAG.getUNDEF(ContainerVT), Mask, VL});
7057 NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y,
7058 DAG.getUNDEF(ContainerVT), VL);
7059 }
7060
7061 unsigned Opc =
7062 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
7063 ? RISCVISD::VFMAX_VL
7064 : RISCVISD::VFMIN_VL;
7065 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
7066 DAG.getUNDEF(ContainerVT), Mask, VL);
7067 if (VT.isFixedLengthVector())
7068 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
7069 return Res;
7070}
7071
7073 const RISCVSubtarget &Subtarget) {
7074 bool IsFABS = Op.getOpcode() == ISD::FABS;
7075 assert((IsFABS || Op.getOpcode() == ISD::FNEG) &&
7076 "Wrong opcode for lowering FABS or FNEG.");
7077
7078 MVT XLenVT = Subtarget.getXLenVT();
7079 MVT VT = Op.getSimpleValueType();
7080 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
7081
7082 SDLoc DL(Op);
7083 SDValue Fmv =
7084 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op.getOperand(0));
7085
7086 APInt Mask = IsFABS ? APInt::getSignedMaxValue(16) : APInt::getSignMask(16);
7087 Mask = Mask.sext(Subtarget.getXLen());
7088
7089 unsigned LogicOpc = IsFABS ? ISD::AND : ISD::XOR;
7090 SDValue Logic =
7091 DAG.getNode(LogicOpc, DL, XLenVT, Fmv, DAG.getConstant(Mask, DL, XLenVT));
7092 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, Logic);
7093}
7094
7096 const RISCVSubtarget &Subtarget) {
7097 assert(Op.getOpcode() == ISD::FCOPYSIGN && "Unexpected opcode");
7098
7099 MVT XLenVT = Subtarget.getXLenVT();
7100 MVT VT = Op.getSimpleValueType();
7101 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
7102
7103 SDValue Mag = Op.getOperand(0);
7104 SDValue Sign = Op.getOperand(1);
7105
7106 SDLoc DL(Op);
7107
7108 // Get sign bit into an integer value.
7109 unsigned SignSize = Sign.getValueSizeInBits();
7110 SDValue SignAsInt = [&]() {
7111 if (SignSize == Subtarget.getXLen())
7112 return DAG.getNode(ISD::BITCAST, DL, XLenVT, Sign);
7113 switch (SignSize) {
7114 case 16:
7115 return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Sign);
7116 case 32:
7117 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, XLenVT, Sign);
7118 case 64: {
7119 assert(XLenVT == MVT::i32 && "Unexpected type");
7120 // Copy the upper word to integer.
7121 SignSize = 32;
7122 return DAG.getNode(RISCVISD::SplitF64, DL, {MVT::i32, MVT::i32}, Sign)
7123 .getValue(1);
7124 }
7125 default:
7126 llvm_unreachable("Unexpected sign size");
7127 }
7128 }();
7129
7130 // Get the signbit at the right position for MagAsInt.
7131 if (int ShiftAmount = (int)SignSize - (int)Mag.getValueSizeInBits())
7132 SignAsInt = DAG.getNode(ShiftAmount > 0 ? ISD::SRL : ISD::SHL, DL, XLenVT,
7133 SignAsInt,
7134 DAG.getConstant(std::abs(ShiftAmount), DL, XLenVT));
7135
7136 // Mask the sign bit and any bits above it. The extra bits will be dropped
7137 // when we convert back to FP.
7138 SDValue SignMask = DAG.getConstant(
7139 APInt::getSignMask(16).sext(Subtarget.getXLen()), DL, XLenVT);
7140 SDValue SignBit = DAG.getNode(ISD::AND, DL, XLenVT, SignAsInt, SignMask);
7141
7142 // Transform Mag value to integer, and clear the sign bit.
7143 SDValue MagAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Mag);
7144 SDValue ClearSignMask = DAG.getConstant(
7145 APInt::getSignedMaxValue(16).sext(Subtarget.getXLen()), DL, XLenVT);
7146 SDValue ClearedSign =
7147 DAG.getNode(ISD::AND, DL, XLenVT, MagAsInt, ClearSignMask);
7148
7149 SDValue CopiedSign = DAG.getNode(ISD::OR, DL, XLenVT, ClearedSign, SignBit,
7151
7152 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, CopiedSign);
7153}
7154
7155/// Get a RISC-V target specified VL op for a given SDNode.
7156static unsigned getRISCVVLOp(SDValue Op) {
7157#define OP_CASE(NODE) \
7158 case ISD::NODE: \
7159 return RISCVISD::NODE##_VL;
7160#define VP_CASE(NODE) \
7161 case ISD::VP_##NODE: \
7162 return RISCVISD::NODE##_VL;
7163 // clang-format off
7164 switch (Op.getOpcode()) {
7165 default:
7166 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
7167 OP_CASE(ADD)
7168 OP_CASE(SUB)
7169 OP_CASE(MUL)
7170 OP_CASE(MULHS)
7171 OP_CASE(MULHU)
7172 OP_CASE(SDIV)
7173 OP_CASE(SREM)
7174 OP_CASE(UDIV)
7175 OP_CASE(UREM)
7176 OP_CASE(SHL)
7177 OP_CASE(SRA)
7178 OP_CASE(SRL)
7179 OP_CASE(ROTL)
7180 OP_CASE(ROTR)
7181 OP_CASE(BSWAP)
7182 OP_CASE(CTTZ)
7183 OP_CASE(CTLZ)
7184 OP_CASE(CTPOP)
7185 OP_CASE(BITREVERSE)
7186 OP_CASE(SADDSAT)
7187 OP_CASE(UADDSAT)
7188 OP_CASE(SSUBSAT)
7189 OP_CASE(USUBSAT)
7190 OP_CASE(AVGFLOORS)
7191 OP_CASE(AVGFLOORU)
7192 OP_CASE(AVGCEILS)
7193 OP_CASE(AVGCEILU)
7194 OP_CASE(FADD)
7195 OP_CASE(FSUB)
7196 OP_CASE(FMUL)
7197 OP_CASE(FDIV)
7198 OP_CASE(FNEG)
7199 OP_CASE(FABS)
7200 OP_CASE(FCOPYSIGN)
7201 OP_CASE(FSQRT)
7202 OP_CASE(SMIN)
7203 OP_CASE(SMAX)
7204 OP_CASE(UMIN)
7205 OP_CASE(UMAX)
7206 OP_CASE(STRICT_FADD)
7207 OP_CASE(STRICT_FSUB)
7208 OP_CASE(STRICT_FMUL)
7209 OP_CASE(STRICT_FDIV)
7210 OP_CASE(STRICT_FSQRT)
7211 VP_CASE(ADD) // VP_ADD
7212 VP_CASE(SUB) // VP_SUB
7213 VP_CASE(MUL) // VP_MUL
7214 VP_CASE(SDIV) // VP_SDIV
7215 VP_CASE(SREM) // VP_SREM
7216 VP_CASE(UDIV) // VP_UDIV
7217 VP_CASE(UREM) // VP_UREM
7218 VP_CASE(SHL) // VP_SHL
7219 VP_CASE(FADD) // VP_FADD
7220 VP_CASE(FSUB) // VP_FSUB
7221 VP_CASE(FMUL) // VP_FMUL
7222 VP_CASE(FDIV) // VP_FDIV
7223 VP_CASE(FNEG) // VP_FNEG
7224 VP_CASE(FABS) // VP_FABS
7225 VP_CASE(SMIN) // VP_SMIN
7226 VP_CASE(SMAX) // VP_SMAX
7227 VP_CASE(UMIN) // VP_UMIN
7228 VP_CASE(UMAX) // VP_UMAX
7229 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
7230 VP_CASE(SETCC) // VP_SETCC
7231 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
7232 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
7233 VP_CASE(BITREVERSE) // VP_BITREVERSE
7234 VP_CASE(SADDSAT) // VP_SADDSAT
7235 VP_CASE(UADDSAT) // VP_UADDSAT
7236 VP_CASE(SSUBSAT) // VP_SSUBSAT
7237 VP_CASE(USUBSAT) // VP_USUBSAT
7238 VP_CASE(BSWAP) // VP_BSWAP
7239 VP_CASE(CTLZ) // VP_CTLZ
7240 VP_CASE(CTTZ) // VP_CTTZ
7241 VP_CASE(CTPOP) // VP_CTPOP
7243 case ISD::VP_CTLZ_ZERO_UNDEF:
7244 return RISCVISD::CTLZ_VL;
7246 case ISD::VP_CTTZ_ZERO_UNDEF:
7247 return RISCVISD::CTTZ_VL;
7248 case ISD::FMA:
7249 case ISD::VP_FMA:
7250 return RISCVISD::VFMADD_VL;
7251 case ISD::STRICT_FMA:
7252 return RISCVISD::STRICT_VFMADD_VL;
7253 case ISD::AND:
7254 case ISD::VP_AND:
7255 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7256 return RISCVISD::VMAND_VL;
7257 return RISCVISD::AND_VL;
7258 case ISD::OR:
7259 case ISD::VP_OR:
7260 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7261 return RISCVISD::VMOR_VL;
7262 return RISCVISD::OR_VL;
7263 case ISD::XOR:
7264 case ISD::VP_XOR:
7265 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7266 return RISCVISD::VMXOR_VL;
7267 return RISCVISD::XOR_VL;
7268 case ISD::ANY_EXTEND:
7269 case ISD::ZERO_EXTEND:
7270 return RISCVISD::VZEXT_VL;
7271 case ISD::SIGN_EXTEND:
7272 return RISCVISD::VSEXT_VL;
7273 case ISD::SETCC:
7274 return RISCVISD::SETCC_VL;
7275 case ISD::VSELECT:
7276 return RISCVISD::VMERGE_VL;
7277 case ISD::VP_SELECT:
7278 case ISD::VP_MERGE:
7279 return RISCVISD::VMERGE_VL;
7280 case ISD::VP_SRA:
7281 return RISCVISD::SRA_VL;
7282 case ISD::VP_SRL:
7283 return RISCVISD::SRL_VL;
7284 case ISD::VP_SQRT:
7285 return RISCVISD::FSQRT_VL;
7286 case ISD::VP_SIGN_EXTEND:
7287 return RISCVISD::VSEXT_VL;
7288 case ISD::VP_ZERO_EXTEND:
7289 return RISCVISD::VZEXT_VL;
7290 case ISD::VP_FP_TO_SINT:
7291 return RISCVISD::VFCVT_RTZ_X_F_VL;
7292 case ISD::VP_FP_TO_UINT:
7293 return RISCVISD::VFCVT_RTZ_XU_F_VL;
7294 case ISD::FMINNUM:
7295 case ISD::FMINIMUMNUM:
7296 case ISD::VP_FMINNUM:
7297 return RISCVISD::VFMIN_VL;
7298 case ISD::FMAXNUM:
7299 case ISD::FMAXIMUMNUM:
7300 case ISD::VP_FMAXNUM:
7301 return RISCVISD::VFMAX_VL;
7302 case ISD::LRINT:
7303 case ISD::VP_LRINT:
7304 case ISD::LLRINT:
7305 case ISD::VP_LLRINT:
7306 return RISCVISD::VFCVT_RM_X_F_VL;
7307 }
7308 // clang-format on
7309#undef OP_CASE
7310#undef VP_CASE
7311}
7312
7314 const RISCVSubtarget &Subtarget) {
7315 return (Op.getValueType() == MVT::nxv32f16 &&
7316 (Subtarget.hasVInstructionsF16Minimal() &&
7317 !Subtarget.hasVInstructionsF16())) ||
7318 (Op.getValueType() == MVT::nxv32bf16 &&
7319 Subtarget.hasVInstructionsBF16Minimal() &&
7320 (!Subtarget.hasVInstructionsBF16() ||
7321 (!llvm::is_contained(ZvfbfaOps, Op.getOpcode()) &&
7322 !llvm::is_contained(ZvfbfaVPOps, Op.getOpcode()))));
7323}
7324
7326 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
7327 SDLoc DL(Op);
7328
7329 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
7330 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
7331
7332 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
7333 if (!Op.getOperand(j).getValueType().isVector()) {
7334 LoOperands[j] = Op.getOperand(j);
7335 HiOperands[j] = Op.getOperand(j);
7336 continue;
7337 }
7338 std::tie(LoOperands[j], HiOperands[j]) =
7339 DAG.SplitVector(Op.getOperand(j), DL);
7340 }
7341
7342 SDValue LoRes =
7343 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
7344 SDValue HiRes =
7345 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
7346
7347 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
7348}
7349
7351 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
7352 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
7353 SDLoc DL(Op);
7354
7355 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
7356 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
7357
7358 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
7359 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
7360 std::tie(LoOperands[j], HiOperands[j]) =
7361 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
7362 continue;
7363 }
7364 if (!Op.getOperand(j).getValueType().isVector()) {
7365 LoOperands[j] = Op.getOperand(j);
7366 HiOperands[j] = Op.getOperand(j);
7367 continue;
7368 }
7369 std::tie(LoOperands[j], HiOperands[j]) =
7370 DAG.SplitVector(Op.getOperand(j), DL);
7371 }
7372
7373 SDValue LoRes =
7374 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
7375 SDValue HiRes =
7376 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
7377
7378 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
7379}
7380
7382 SDLoc DL(Op);
7383
7384 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
7385 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
7386 auto [EVLLo, EVLHi] =
7387 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
7388
7389 SDValue ResLo =
7390 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7391 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
7392 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7393 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
7394}
7395
7397
7398 assert(Op->isStrictFPOpcode());
7399
7400 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
7401
7402 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
7403 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
7404
7405 SDLoc DL(Op);
7406
7407 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
7408 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
7409
7410 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
7411 if (!Op.getOperand(j).getValueType().isVector()) {
7412 LoOperands[j] = Op.getOperand(j);
7413 HiOperands[j] = Op.getOperand(j);
7414 continue;
7415 }
7416 std::tie(LoOperands[j], HiOperands[j]) =
7417 DAG.SplitVector(Op.getOperand(j), DL);
7418 }
7419
7420 SDValue LoRes =
7421 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
7422 HiOperands[0] = LoRes.getValue(1);
7423 SDValue HiRes =
7424 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
7425
7426 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
7427 LoRes.getValue(0), HiRes.getValue(0));
7428 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
7429}
7430
7431SDValue
7432RISCVTargetLowering::lowerXAndesBfHCvtBFloat16Load(SDValue Op,
7433 SelectionDAG &DAG) const {
7434 assert(Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh() &&
7435 "Unexpected bfloat16 load lowering");
7436
7437 SDLoc DL(Op);
7438 LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
7439 EVT MemVT = LD->getMemoryVT();
7440 SDValue Load = DAG.getExtLoad(
7441 ISD::ZEXTLOAD, DL, Subtarget.getXLenVT(), LD->getChain(),
7442 LD->getBasePtr(),
7444 LD->getMemOperand());
7445 // Using mask to make bf16 nan-boxing valid when we don't have flh
7446 // instruction. -65536 would be treat as a small number and thus it can be
7447 // directly used lui to get the constant.
7448 SDValue mask = DAG.getSignedConstant(-65536, DL, Subtarget.getXLenVT());
7449 SDValue OrSixteenOne =
7450 DAG.getNode(ISD::OR, DL, Load.getValueType(), {Load, mask});
7451 SDValue ConvertedResult =
7452 DAG.getNode(RISCVISD::NDS_FMV_BF16_X, DL, MVT::bf16, OrSixteenOne);
7453 return DAG.getMergeValues({ConvertedResult, Load.getValue(1)}, DL);
7454}
7455
7456SDValue
7457RISCVTargetLowering::lowerXAndesBfHCvtBFloat16Store(SDValue Op,
7458 SelectionDAG &DAG) const {
7459 assert(Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh() &&
7460 "Unexpected bfloat16 store lowering");
7461
7462 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
7463 SDLoc DL(Op);
7464 SDValue FMV = DAG.getNode(RISCVISD::NDS_FMV_X_ANYEXTBF16, DL,
7465 Subtarget.getXLenVT(), ST->getValue());
7466 return DAG.getTruncStore(
7467 ST->getChain(), DL, FMV, ST->getBasePtr(),
7468 EVT::getIntegerVT(*DAG.getContext(), ST->getMemoryVT().getSizeInBits()),
7469 ST->getMemOperand());
7470}
7471
7473 SelectionDAG &DAG) const {
7474 switch (Op.getOpcode()) {
7475 default:
7477 "Unimplemented RISCVTargetLowering::LowerOperation Case");
7478 case ISD::PREFETCH:
7479 return LowerPREFETCH(Op, Subtarget, DAG);
7480 case ISD::ATOMIC_FENCE:
7481 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
7482 case ISD::GlobalAddress:
7483 return lowerGlobalAddress(Op, DAG);
7484 case ISD::BlockAddress:
7485 return lowerBlockAddress(Op, DAG);
7486 case ISD::ConstantPool:
7487 return lowerConstantPool(Op, DAG);
7488 case ISD::JumpTable:
7489 return lowerJumpTable(Op, DAG);
7491 return lowerGlobalTLSAddress(Op, DAG);
7492 case ISD::Constant:
7493 return lowerConstant(Op, DAG, Subtarget);
7494 case ISD::ConstantFP:
7495 return lowerConstantFP(Op, DAG);
7496 case ISD::SELECT:
7497 return lowerSELECT(Op, DAG);
7498 case ISD::BRCOND:
7499 return lowerBRCOND(Op, DAG);
7500 case ISD::VASTART:
7501 return lowerVASTART(Op, DAG);
7502 case ISD::FRAMEADDR:
7503 return lowerFRAMEADDR(Op, DAG);
7504 case ISD::RETURNADDR:
7505 return lowerRETURNADDR(Op, DAG);
7506 case ISD::SHL_PARTS:
7507 return lowerShiftLeftParts(Op, DAG);
7508 case ISD::SRA_PARTS:
7509 return lowerShiftRightParts(Op, DAG, true);
7510 case ISD::SRL_PARTS:
7511 return lowerShiftRightParts(Op, DAG, false);
7512 case ISD::ROTL:
7513 case ISD::ROTR:
7514 if (Op.getValueType().isFixedLengthVector()) {
7515 assert(Subtarget.hasStdExtZvkb());
7516 return lowerToScalableOp(Op, DAG);
7517 }
7518 assert(Subtarget.hasVendorXTHeadBb() &&
7519 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
7520 "Unexpected custom legalization");
7521 // XTHeadBb only supports rotate by constant.
7522 if (!isa<ConstantSDNode>(Op.getOperand(1)))
7523 return SDValue();
7524 return Op;
7525 case ISD::BITCAST: {
7526 SDLoc DL(Op);
7527 EVT VT = Op.getValueType();
7528 SDValue Op0 = Op.getOperand(0);
7529 EVT Op0VT = Op0.getValueType();
7530 MVT XLenVT = Subtarget.getXLenVT();
7531 if (Op0VT == MVT::i16 &&
7532 ((VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
7533 (VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
7534 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
7535 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, NewOp0);
7536 }
7537 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
7538 Subtarget.hasStdExtFOrZfinx()) {
7539 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
7540 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
7541 }
7542 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit() &&
7543 Subtarget.hasStdExtDOrZdinx()) {
7544 SDValue Lo, Hi;
7545 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
7546 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
7547 }
7548
7549 // Consider other scalar<->scalar casts as legal if the types are legal.
7550 // Otherwise expand them.
7551 if (!VT.isVector() && !Op0VT.isVector()) {
7552 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
7553 return Op;
7554 return SDValue();
7555 }
7556
7557 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
7558 "Unexpected types");
7559
7560 if (VT.isFixedLengthVector()) {
7561 // We can handle fixed length vector bitcasts with a simple replacement
7562 // in isel.
7563 if (Op0VT.isFixedLengthVector())
7564 return Op;
7565 // When bitcasting from scalar to fixed-length vector, insert the scalar
7566 // into a one-element vector of the result type, and perform a vector
7567 // bitcast.
7568 if (!Op0VT.isVector()) {
7569 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
7570 if (!isTypeLegal(BVT))
7571 return SDValue();
7572 return DAG.getBitcast(
7573 VT, DAG.getInsertVectorElt(DL, DAG.getUNDEF(BVT), Op0, 0));
7574 }
7575 return SDValue();
7576 }
7577 // Custom-legalize bitcasts from fixed-length vector types to scalar types
7578 // thus: bitcast the vector to a one-element vector type whose element type
7579 // is the same as the result type, and extract the first element.
7580 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
7581 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
7582 if (!isTypeLegal(BVT))
7583 return SDValue();
7584 SDValue BVec = DAG.getBitcast(BVT, Op0);
7585 return DAG.getExtractVectorElt(DL, VT, BVec, 0);
7586 }
7587 return SDValue();
7588 }
7590 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
7592 return LowerINTRINSIC_W_CHAIN(Op, DAG);
7594 return LowerINTRINSIC_VOID(Op, DAG);
7595 case ISD::IS_FPCLASS:
7596 return LowerIS_FPCLASS(Op, DAG);
7597 case ISD::BITREVERSE: {
7598 MVT VT = Op.getSimpleValueType();
7599 if (VT.isFixedLengthVector()) {
7600 assert(Subtarget.hasStdExtZvbb());
7601 return lowerToScalableOp(Op, DAG);
7602 }
7603 SDLoc DL(Op);
7604 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
7605 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
7606 // Expand bitreverse to a bswap(rev8) followed by brev8.
7607 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
7608 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
7609 }
7610 case ISD::TRUNCATE:
7613 // Only custom-lower vector truncates
7614 if (!Op.getSimpleValueType().isVector())
7615 return Op;
7616 return lowerVectorTruncLike(Op, DAG);
7617 case ISD::ANY_EXTEND:
7618 case ISD::ZERO_EXTEND:
7619 if (Op.getOperand(0).getValueType().isVector() &&
7620 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7621 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
7622 if (Op.getValueType().isScalableVector())
7623 return Op;
7624 return lowerToScalableOp(Op, DAG);
7625 case ISD::SIGN_EXTEND:
7626 if (Op.getOperand(0).getValueType().isVector() &&
7627 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7628 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
7629 if (Op.getValueType().isScalableVector())
7630 return Op;
7631 return lowerToScalableOp(Op, DAG);
7633 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
7635 return lowerINSERT_VECTOR_ELT(Op, DAG);
7637 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
7638 case ISD::SCALAR_TO_VECTOR: {
7639 MVT VT = Op.getSimpleValueType();
7640 SDLoc DL(Op);
7641 SDValue Scalar = Op.getOperand(0);
7642 if (VT.getVectorElementType() == MVT::i1) {
7643 MVT WideVT = VT.changeVectorElementType(MVT::i8);
7644 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
7645 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
7646 }
7647 MVT ContainerVT = VT;
7648 if (VT.isFixedLengthVector())
7649 ContainerVT = getContainerForFixedLengthVector(VT);
7650 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
7651
7652 SDValue V;
7653 if (VT.isFloatingPoint()) {
7654 V = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, ContainerVT,
7655 DAG.getUNDEF(ContainerVT), Scalar, VL);
7656 } else {
7657 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
7658 V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
7659 DAG.getUNDEF(ContainerVT), Scalar, VL);
7660 }
7661 if (VT.isFixedLengthVector())
7662 V = convertFromScalableVector(VT, V, DAG, Subtarget);
7663 return V;
7664 }
7665 case ISD::VSCALE: {
7666 MVT XLenVT = Subtarget.getXLenVT();
7667 MVT VT = Op.getSimpleValueType();
7668 SDLoc DL(Op);
7669 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
7670 // We define our scalable vector types for lmul=1 to use a 64 bit known
7671 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
7672 // vscale as VLENB / 8.
7673 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
7674 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
7675 reportFatalInternalError("Support for VLEN==32 is incomplete.");
7676 // We assume VLENB is a multiple of 8. We manually choose the best shift
7677 // here because SimplifyDemandedBits isn't always able to simplify it.
7678 uint64_t Val = Op.getConstantOperandVal(0);
7679 if (isPowerOf2_64(Val)) {
7680 uint64_t Log2 = Log2_64(Val);
7681 if (Log2 < 3) {
7682 SDNodeFlags Flags;
7683 Flags.setExact(true);
7684 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
7685 DAG.getConstant(3 - Log2, DL, XLenVT), Flags);
7686 } else if (Log2 > 3) {
7687 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
7688 DAG.getConstant(Log2 - 3, DL, XLenVT));
7689 }
7690 } else if ((Val % 8) == 0) {
7691 // If the multiplier is a multiple of 8, scale it down to avoid needing
7692 // to shift the VLENB value.
7693 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
7694 DAG.getConstant(Val / 8, DL, XLenVT));
7695 } else {
7696 SDNodeFlags Flags;
7697 Flags.setExact(true);
7698 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
7699 DAG.getConstant(3, DL, XLenVT), Flags);
7700 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
7701 DAG.getConstant(Val, DL, XLenVT));
7702 }
7703 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
7704 }
7705 case ISD::FPOWI: {
7706 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
7707 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
7708 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
7709 Op.getOperand(1).getValueType() == MVT::i32) {
7710 SDLoc DL(Op);
7711 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
7712 SDValue Powi =
7713 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
7714 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
7715 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7716 }
7717 return SDValue();
7718 }
7719 case ISD::FMAXIMUM:
7720 case ISD::FMINIMUM:
7721 if (isPromotedOpNeedingSplit(Op, Subtarget))
7722 return SplitVectorOp(Op, DAG);
7723 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
7724 case ISD::FP_EXTEND:
7725 case ISD::FP_ROUND:
7726 return lowerVectorFPExtendOrRoundLike(Op, DAG);
7729 return lowerStrictFPExtendOrRoundLike(Op, DAG);
7730 case ISD::SINT_TO_FP:
7731 case ISD::UINT_TO_FP:
7732 if (Op.getValueType().isVector() &&
7733 ((Op.getValueType().getScalarType() == MVT::f16 &&
7734 (Subtarget.hasVInstructionsF16Minimal() &&
7735 !Subtarget.hasVInstructionsF16())) ||
7736 Op.getValueType().getScalarType() == MVT::bf16)) {
7737 if (isPromotedOpNeedingSplit(Op, Subtarget))
7738 return SplitVectorOp(Op, DAG);
7739 // int -> f32
7740 SDLoc DL(Op);
7741 MVT NVT =
7742 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
7743 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
7744 // f32 -> [b]f16
7745 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
7746 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7747 }
7748 [[fallthrough]];
7749 case ISD::FP_TO_SINT:
7750 case ISD::FP_TO_UINT:
7751 if (SDValue Op1 = Op.getOperand(0);
7752 Op1.getValueType().isVector() &&
7753 ((Op1.getValueType().getScalarType() == MVT::f16 &&
7754 (Subtarget.hasVInstructionsF16Minimal() &&
7755 !Subtarget.hasVInstructionsF16())) ||
7756 Op1.getValueType().getScalarType() == MVT::bf16)) {
7757 if (isPromotedOpNeedingSplit(Op1, Subtarget))
7758 return SplitVectorOp(Op, DAG);
7759 // [b]f16 -> f32
7760 SDLoc DL(Op);
7761 MVT NVT = MVT::getVectorVT(MVT::f32,
7762 Op1.getValueType().getVectorElementCount());
7763 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
7764 // f32 -> int
7765 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
7766 }
7767 [[fallthrough]];
7772 // RVV can only do fp<->int conversions to types half/double the size as
7773 // the source. We custom-lower any conversions that do two hops into
7774 // sequences.
7775 MVT VT = Op.getSimpleValueType();
7776 if (VT.isScalarInteger())
7777 return lowerFP_TO_INT(Op, DAG, Subtarget);
7778 bool IsStrict = Op->isStrictFPOpcode();
7779 SDValue Src = Op.getOperand(0 + IsStrict);
7780 MVT SrcVT = Src.getSimpleValueType();
7781 if (SrcVT.isScalarInteger())
7782 return lowerINT_TO_FP(Op, DAG, Subtarget);
7783 if (!VT.isVector())
7784 return Op;
7785 SDLoc DL(Op);
7786 MVT EltVT = VT.getVectorElementType();
7787 MVT SrcEltVT = SrcVT.getVectorElementType();
7788 unsigned EltSize = EltVT.getSizeInBits();
7789 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
7790 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
7791 "Unexpected vector element types");
7792
7793 bool IsInt2FP = SrcEltVT.isInteger();
7794 // Widening conversions
7795 if (EltSize > (2 * SrcEltSize)) {
7796 if (IsInt2FP) {
7797 // Do a regular integer sign/zero extension then convert to float.
7798 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
7800 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
7801 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
7804 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
7805 if (IsStrict)
7806 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
7807 Op.getOperand(0), Ext);
7808 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
7809 }
7810 // FP2Int
7811 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
7812 // Do one doubling fp_extend then complete the operation by converting
7813 // to int.
7814 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
7815 if (IsStrict) {
7816 auto [FExt, Chain] =
7817 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
7818 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
7819 }
7820 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
7821 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
7822 }
7823
7824 // Narrowing conversions
7825 if (SrcEltSize > (2 * EltSize)) {
7826 if (IsInt2FP) {
7827 // One narrowing int_to_fp, then an fp_round.
7828 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
7829 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
7830 if (IsStrict) {
7831 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
7832 DAG.getVTList(InterimFVT, MVT::Other),
7833 Op.getOperand(0), Src);
7834 SDValue Chain = Int2FP.getValue(1);
7835 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
7836 }
7837 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
7838 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
7839 }
7840 // FP2Int
7841 // One narrowing fp_to_int, then truncate the integer. If the float isn't
7842 // representable by the integer, the result is poison.
7843 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
7845 if (IsStrict) {
7846 SDValue FP2Int =
7847 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
7848 Op.getOperand(0), Src);
7849 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
7850 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
7851 }
7852 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
7853 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
7854 }
7855
7856 // Scalable vectors can exit here. Patterns will handle equally-sized
7857 // conversions halving/doubling ones.
7858 if (!VT.isFixedLengthVector())
7859 return Op;
7860
7861 // For fixed-length vectors we lower to a custom "VL" node.
7862 unsigned RVVOpc = 0;
7863 switch (Op.getOpcode()) {
7864 default:
7865 llvm_unreachable("Impossible opcode");
7866 case ISD::FP_TO_SINT:
7867 RVVOpc = RISCVISD::VFCVT_RTZ_X_F_VL;
7868 break;
7869 case ISD::FP_TO_UINT:
7870 RVVOpc = RISCVISD::VFCVT_RTZ_XU_F_VL;
7871 break;
7872 case ISD::SINT_TO_FP:
7873 RVVOpc = RISCVISD::SINT_TO_FP_VL;
7874 break;
7875 case ISD::UINT_TO_FP:
7876 RVVOpc = RISCVISD::UINT_TO_FP_VL;
7877 break;
7879 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_X_F_VL;
7880 break;
7882 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_XU_F_VL;
7883 break;
7885 RVVOpc = RISCVISD::STRICT_SINT_TO_FP_VL;
7886 break;
7888 RVVOpc = RISCVISD::STRICT_UINT_TO_FP_VL;
7889 break;
7890 }
7891
7892 MVT ContainerVT = getContainerForFixedLengthVector(VT);
7893 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
7894 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
7895 "Expected same element count");
7896
7897 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
7898
7899 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
7900 if (IsStrict) {
7901 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
7902 Op.getOperand(0), Src, Mask, VL);
7903 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
7904 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
7905 }
7906 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
7907 return convertFromScalableVector(VT, Src, DAG, Subtarget);
7908 }
7911 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
7912 case ISD::FP_TO_BF16: {
7913 // Custom lower to ensure the libcall return is passed in an FPR on hard
7914 // float ABIs.
7915 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
7916 SDLoc DL(Op);
7917 MakeLibCallOptions CallOptions;
7918 RTLIB::Libcall LC =
7919 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
7920 SDValue Res =
7921 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
7922 if (Subtarget.is64Bit())
7923 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
7924 return DAG.getBitcast(MVT::i32, Res);
7925 }
7926 case ISD::BF16_TO_FP: {
7927 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
7928 MVT VT = Op.getSimpleValueType();
7929 SDLoc DL(Op);
7930 Op = DAG.getNode(
7931 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
7932 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
7933 SDValue Res = Subtarget.is64Bit()
7934 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
7935 : DAG.getBitcast(MVT::f32, Op);
7936 // fp_extend if the target VT is bigger than f32.
7937 if (VT != MVT::f32)
7938 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
7939 return Res;
7940 }
7941 case ISD::STRICT_FP_TO_FP16:
7942 case ISD::FP_TO_FP16: {
7943 // Custom lower to ensure the libcall return is passed in an FPR on hard
7944 // float ABIs.
7945 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7946 SDLoc DL(Op);
7947 MakeLibCallOptions CallOptions;
7948 bool IsStrict = Op->isStrictFPOpcode();
7949 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
7950 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7951 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
7952 SDValue Res;
7953 std::tie(Res, Chain) =
7954 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
7955 if (Subtarget.is64Bit())
7956 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
7957 SDValue Result = DAG.getBitcast(MVT::i32, IsStrict ? Res.getValue(0) : Res);
7958 if (IsStrict)
7959 return DAG.getMergeValues({Result, Chain}, DL);
7960 return Result;
7961 }
7962 case ISD::STRICT_FP16_TO_FP:
7963 case ISD::FP16_TO_FP: {
7964 // Custom lower to ensure the libcall argument is passed in an FPR on hard
7965 // float ABIs.
7966 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7967 SDLoc DL(Op);
7968 MakeLibCallOptions CallOptions;
7969 bool IsStrict = Op->isStrictFPOpcode();
7970 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
7971 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7972 SDValue Arg = Subtarget.is64Bit()
7973 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op0)
7974 : DAG.getBitcast(MVT::f32, Op0);
7975 SDValue Res;
7976 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
7977 CallOptions, DL, Chain);
7978 if (IsStrict)
7979 return DAG.getMergeValues({Res, Chain}, DL);
7980 return Res;
7981 }
7982 case ISD::FTRUNC:
7983 case ISD::FCEIL:
7984 case ISD::FFLOOR:
7985 case ISD::FNEARBYINT:
7986 case ISD::FRINT:
7987 case ISD::FROUND:
7988 case ISD::FROUNDEVEN:
7989 if (isPromotedOpNeedingSplit(Op, Subtarget))
7990 return SplitVectorOp(Op, DAG);
7991 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7992 case ISD::LRINT:
7993 case ISD::LLRINT:
7994 case ISD::LROUND:
7995 case ISD::LLROUND: {
7996 if (Op.getValueType().isVector())
7997 return lowerVectorXRINT_XROUND(Op, DAG, Subtarget);
7998 assert(Op.getOperand(0).getValueType() == MVT::f16 &&
7999 "Unexpected custom legalisation");
8000 SDLoc DL(Op);
8001 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
8002 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), Ext);
8003 }
8004 case ISD::STRICT_LRINT:
8005 case ISD::STRICT_LLRINT:
8006 case ISD::STRICT_LROUND:
8007 case ISD::STRICT_LLROUND: {
8008 assert(Op.getOperand(1).getValueType() == MVT::f16 &&
8009 "Unexpected custom legalisation");
8010 SDLoc DL(Op);
8011 SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
8012 {Op.getOperand(0), Op.getOperand(1)});
8013 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
8014 {Ext.getValue(1), Ext.getValue(0)});
8015 }
8016 case ISD::VECREDUCE_ADD:
8017 case ISD::VECREDUCE_UMAX:
8018 case ISD::VECREDUCE_SMAX:
8019 case ISD::VECREDUCE_UMIN:
8020 case ISD::VECREDUCE_SMIN:
8021 return lowerVECREDUCE(Op, DAG);
8022 case ISD::VECREDUCE_AND:
8023 case ISD::VECREDUCE_OR:
8024 case ISD::VECREDUCE_XOR:
8025 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
8026 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
8027 return lowerVECREDUCE(Op, DAG);
8028 case ISD::VECREDUCE_FADD:
8029 case ISD::VECREDUCE_SEQ_FADD:
8030 case ISD::VECREDUCE_FMIN:
8031 case ISD::VECREDUCE_FMAX:
8032 case ISD::VECREDUCE_FMAXIMUM:
8033 case ISD::VECREDUCE_FMINIMUM:
8034 return lowerFPVECREDUCE(Op, DAG);
8035 case ISD::VP_REDUCE_ADD:
8036 case ISD::VP_REDUCE_UMAX:
8037 case ISD::VP_REDUCE_SMAX:
8038 case ISD::VP_REDUCE_UMIN:
8039 case ISD::VP_REDUCE_SMIN:
8040 case ISD::VP_REDUCE_FADD:
8041 case ISD::VP_REDUCE_SEQ_FADD:
8042 case ISD::VP_REDUCE_FMIN:
8043 case ISD::VP_REDUCE_FMAX:
8044 case ISD::VP_REDUCE_FMINIMUM:
8045 case ISD::VP_REDUCE_FMAXIMUM:
8046 if (isPromotedOpNeedingSplit(Op.getOperand(1), Subtarget))
8047 return SplitVectorReductionOp(Op, DAG);
8048 return lowerVPREDUCE(Op, DAG);
8049 case ISD::VP_REDUCE_AND:
8050 case ISD::VP_REDUCE_OR:
8051 case ISD::VP_REDUCE_XOR:
8052 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
8053 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
8054 return lowerVPREDUCE(Op, DAG);
8055 case ISD::VP_CTTZ_ELTS:
8056 case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
8057 return lowerVPCttzElements(Op, DAG);
8058 case ISD::UNDEF: {
8059 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
8060 return convertFromScalableVector(Op.getSimpleValueType(),
8061 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
8062 }
8064 return lowerINSERT_SUBVECTOR(Op, DAG);
8066 return lowerEXTRACT_SUBVECTOR(Op, DAG);
8068 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
8070 return lowerVECTOR_INTERLEAVE(Op, DAG);
8071 case ISD::STEP_VECTOR:
8072 return lowerSTEP_VECTOR(Op, DAG);
8074 return lowerVECTOR_REVERSE(Op, DAG);
8075 case ISD::VECTOR_SPLICE:
8076 return lowerVECTOR_SPLICE(Op, DAG);
8077 case ISD::BUILD_VECTOR: {
8078 MVT VT = Op.getSimpleValueType();
8079 MVT EltVT = VT.getVectorElementType();
8080 if (!Subtarget.is64Bit() && EltVT == MVT::i64)
8081 return lowerBuildVectorViaVID(Op, DAG, Subtarget);
8082 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
8083 }
8084 case ISD::SPLAT_VECTOR: {
8085 MVT VT = Op.getSimpleValueType();
8086 MVT EltVT = VT.getVectorElementType();
8087 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
8088 EltVT == MVT::bf16) {
8089 SDLoc DL(Op);
8090 SDValue Elt;
8091 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
8092 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
8093 Elt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(),
8094 Op.getOperand(0));
8095 else
8096 Elt = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Op.getOperand(0));
8097 MVT IVT = VT.changeVectorElementType(MVT::i16);
8098 return DAG.getNode(ISD::BITCAST, DL, VT,
8099 DAG.getNode(ISD::SPLAT_VECTOR, DL, IVT, Elt));
8100 }
8101
8102 if (EltVT == MVT::i1)
8103 return lowerVectorMaskSplat(Op, DAG);
8104 return SDValue();
8105 }
8107 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
8108 case ISD::CONCAT_VECTORS: {
8109 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
8110 // better than going through the stack, as the default expansion does.
8111 SDLoc DL(Op);
8112 MVT VT = Op.getSimpleValueType();
8113 MVT ContainerVT = VT;
8114 if (VT.isFixedLengthVector())
8115 ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
8116
8117 // Recursively split concat_vectors with more than 2 operands:
8118 //
8119 // concat_vector op1, op2, op3, op4
8120 // ->
8121 // concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
8122 //
8123 // This reduces the length of the chain of vslideups and allows us to
8124 // perform the vslideups at a smaller LMUL, limited to MF2.
8125 if (Op.getNumOperands() > 2 &&
8126 ContainerVT.bitsGE(RISCVTargetLowering::getM1VT(ContainerVT))) {
8127 MVT HalfVT = VT.getHalfNumVectorElementsVT();
8128 assert(isPowerOf2_32(Op.getNumOperands()));
8129 size_t HalfNumOps = Op.getNumOperands() / 2;
8130 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
8131 Op->ops().take_front(HalfNumOps));
8132 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
8133 Op->ops().drop_front(HalfNumOps));
8134 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
8135 }
8136
8137 unsigned NumOpElts =
8138 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
8139 SDValue Vec = DAG.getUNDEF(VT);
8140 for (const auto &OpIdx : enumerate(Op->ops())) {
8141 SDValue SubVec = OpIdx.value();
8142 // Don't insert undef subvectors.
8143 if (SubVec.isUndef())
8144 continue;
8145 Vec = DAG.getInsertSubvector(DL, Vec, SubVec, OpIdx.index() * NumOpElts);
8146 }
8147 return Vec;
8148 }
8149 case ISD::LOAD: {
8150 auto *Load = cast<LoadSDNode>(Op);
8151 EVT VT = Load->getValueType(0);
8152 if (VT == MVT::f64) {
8153 assert(Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
8154 !Subtarget.is64Bit() && "Unexpected custom legalisation");
8155
8156 // Replace a double precision load with two i32 loads and a BuildPairF64.
8157 SDLoc DL(Op);
8158 SDValue BasePtr = Load->getBasePtr();
8159 SDValue Chain = Load->getChain();
8160
8161 SDValue Lo =
8162 DAG.getLoad(MVT::i32, DL, Chain, BasePtr, Load->getPointerInfo(),
8163 Load->getBaseAlign(), Load->getMemOperand()->getFlags());
8164 BasePtr = DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(4));
8165 SDValue Hi = DAG.getLoad(
8166 MVT::i32, DL, Chain, BasePtr, Load->getPointerInfo().getWithOffset(4),
8167 Load->getBaseAlign(), Load->getMemOperand()->getFlags());
8168 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
8169 Hi.getValue(1));
8170
8171 SDValue Pair = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
8172 return DAG.getMergeValues({Pair, Chain}, DL);
8173 }
8174
8175 if (VT == MVT::bf16)
8176 return lowerXAndesBfHCvtBFloat16Load(Op, DAG);
8177
8178 // Handle normal vector tuple load.
8179 if (VT.isRISCVVectorTuple()) {
8180 SDLoc DL(Op);
8181 MVT XLenVT = Subtarget.getXLenVT();
8182 unsigned NF = VT.getRISCVVectorTupleNumFields();
8183 unsigned Sz = VT.getSizeInBits().getKnownMinValue();
8184 unsigned NumElts = Sz / (NF * 8);
8185 int Log2LMUL = Log2_64(NumElts) - 3;
8186
8187 auto Flag = SDNodeFlags();
8188 Flag.setNoUnsignedWrap(true);
8189 SDValue Ret = DAG.getUNDEF(VT);
8190 SDValue BasePtr = Load->getBasePtr();
8191 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
8192 VROffset =
8193 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
8194 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
8195 SmallVector<SDValue, 8> OutChains;
8196
8197 // Load NF vector registers and combine them to a vector tuple.
8198 for (unsigned i = 0; i < NF; ++i) {
8199 SDValue LoadVal = DAG.getLoad(
8200 MVT::getScalableVectorVT(MVT::i8, NumElts), DL, Load->getChain(),
8201 BasePtr, MachinePointerInfo(Load->getAddressSpace()), Align(8));
8202 OutChains.push_back(LoadVal.getValue(1));
8203 Ret = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Ret, LoadVal,
8204 DAG.getTargetConstant(i, DL, MVT::i32));
8205 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
8206 }
8207 return DAG.getMergeValues(
8208 {Ret, DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains)}, DL);
8209 }
8210
8211 if (auto V = expandUnalignedRVVLoad(Op, DAG))
8212 return V;
8213 if (Op.getValueType().isFixedLengthVector())
8214 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
8215 return Op;
8216 }
8217 case ISD::STORE: {
8218 auto *Store = cast<StoreSDNode>(Op);
8219 SDValue StoredVal = Store->getValue();
8220 EVT VT = StoredVal.getValueType();
8221 if (VT == MVT::f64) {
8222 assert(Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
8223 !Subtarget.is64Bit() && "Unexpected custom legalisation");
8224
8225 // Replace a double precision store with a SplitF64 and i32 stores.
8226 SDValue DL(Op);
8227 SDValue BasePtr = Store->getBasePtr();
8228 SDValue Chain = Store->getChain();
8229 SDValue Split = DAG.getNode(RISCVISD::SplitF64, DL,
8230 DAG.getVTList(MVT::i32, MVT::i32), StoredVal);
8231
8232 SDValue Lo = DAG.getStore(Chain, DL, Split.getValue(0), BasePtr,
8233 Store->getPointerInfo(), Store->getBaseAlign(),
8234 Store->getMemOperand()->getFlags());
8235 BasePtr = DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(4));
8236 SDValue Hi = DAG.getStore(Chain, DL, Split.getValue(1), BasePtr,
8237 Store->getPointerInfo().getWithOffset(4),
8238 Store->getBaseAlign(),
8239 Store->getMemOperand()->getFlags());
8240 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
8241 }
8242 if (VT == MVT::i64) {
8243 assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&
8244 "Unexpected custom legalisation");
8245 if (Store->isTruncatingStore())
8246 return SDValue();
8247
8248 if (!Subtarget.enableUnalignedScalarMem() && Store->getAlign() < 8)
8249 return SDValue();
8250
8251 SDLoc DL(Op);
8252 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, StoredVal,
8253 DAG.getTargetConstant(0, DL, MVT::i32));
8254 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, StoredVal,
8255 DAG.getTargetConstant(1, DL, MVT::i32));
8256
8257 return DAG.getMemIntrinsicNode(
8258 RISCVISD::SD_RV32, DL, DAG.getVTList(MVT::Other),
8259 {Store->getChain(), Lo, Hi, Store->getBasePtr()}, MVT::i64,
8260 Store->getMemOperand());
8261 }
8262
8263 if (VT == MVT::bf16)
8264 return lowerXAndesBfHCvtBFloat16Store(Op, DAG);
8265
8266 // Handle normal vector tuple store.
8267 if (VT.isRISCVVectorTuple()) {
8268 SDLoc DL(Op);
8269 MVT XLenVT = Subtarget.getXLenVT();
8270 unsigned NF = VT.getRISCVVectorTupleNumFields();
8271 unsigned Sz = VT.getSizeInBits().getKnownMinValue();
8272 unsigned NumElts = Sz / (NF * 8);
8273 int Log2LMUL = Log2_64(NumElts) - 3;
8274
8275 auto Flag = SDNodeFlags();
8276 Flag.setNoUnsignedWrap(true);
8277 SDValue Ret;
8278 SDValue Chain = Store->getChain();
8279 SDValue BasePtr = Store->getBasePtr();
8280 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
8281 VROffset =
8282 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
8283 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
8284
8285 // Extract subregisters in a vector tuple and store them individually.
8286 for (unsigned i = 0; i < NF; ++i) {
8287 auto Extract =
8288 DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL,
8289 MVT::getScalableVectorVT(MVT::i8, NumElts), StoredVal,
8290 DAG.getTargetConstant(i, DL, MVT::i32));
8291 Ret = DAG.getStore(Chain, DL, Extract, BasePtr,
8292 MachinePointerInfo(Store->getAddressSpace()),
8293 Store->getBaseAlign(),
8294 Store->getMemOperand()->getFlags());
8295 Chain = Ret.getValue(0);
8296 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
8297 }
8298 return Ret;
8299 }
8300
8301 if (auto V = expandUnalignedRVVStore(Op, DAG))
8302 return V;
8303 if (Op.getOperand(1).getValueType().isFixedLengthVector())
8304 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
8305 return Op;
8306 }
8307 case ISD::MLOAD:
8308 case ISD::VP_LOAD:
8309 return lowerMaskedLoad(Op, DAG);
8310 case ISD::VP_LOAD_FF:
8311 return lowerLoadFF(Op, DAG);
8312 case ISD::MSTORE:
8313 case ISD::VP_STORE:
8314 return lowerMaskedStore(Op, DAG);
8316 return lowerVectorCompress(Op, DAG);
8317 case ISD::SELECT_CC: {
8318 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
8319 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
8320 // into separate SETCC+SELECT just like LegalizeDAG.
8321 SDValue Tmp1 = Op.getOperand(0);
8322 SDValue Tmp2 = Op.getOperand(1);
8323 SDValue True = Op.getOperand(2);
8324 SDValue False = Op.getOperand(3);
8325 EVT VT = Op.getValueType();
8326 SDValue CC = Op.getOperand(4);
8327 EVT CmpVT = Tmp1.getValueType();
8328 EVT CCVT =
8329 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
8330 SDLoc DL(Op);
8331 SDValue Cond =
8332 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
8333 return DAG.getSelect(DL, VT, Cond, True, False);
8334 }
8335 case ISD::SETCC: {
8336 MVT OpVT = Op.getOperand(0).getSimpleValueType();
8337 if (OpVT.isScalarInteger()) {
8338 MVT VT = Op.getSimpleValueType();
8339 SDValue LHS = Op.getOperand(0);
8340 SDValue RHS = Op.getOperand(1);
8341 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
8342 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
8343 "Unexpected CondCode");
8344
8345 SDLoc DL(Op);
8346
8347 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
8348 // convert this to the equivalent of (set(u)ge X, C+1) by using
8349 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
8350 // in a register.
8351 if (isa<ConstantSDNode>(RHS)) {
8352 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
8353 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
8354 // If this is an unsigned compare and the constant is -1, incrementing
8355 // the constant would change behavior. The result should be false.
8356 if (CCVal == ISD::SETUGT && Imm == -1)
8357 return DAG.getConstant(0, DL, VT);
8358 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
8359 CCVal = ISD::getSetCCSwappedOperands(CCVal);
8360 SDValue SetCC = DAG.getSetCC(
8361 DL, VT, LHS, DAG.getSignedConstant(Imm + 1, DL, OpVT), CCVal);
8362 return DAG.getLogicalNOT(DL, SetCC, VT);
8363 }
8364 // Lower (setugt X, 2047) as (setne (srl X, 11), 0).
8365 if (CCVal == ISD::SETUGT && Imm == 2047) {
8366 SDValue Shift = DAG.getNode(ISD::SRL, DL, OpVT, LHS,
8367 DAG.getShiftAmountConstant(11, OpVT, DL));
8368 return DAG.getSetCC(DL, VT, Shift, DAG.getConstant(0, DL, OpVT),
8369 ISD::SETNE);
8370 }
8371 }
8372
8373 // Not a constant we could handle, swap the operands and condition code to
8374 // SETLT/SETULT.
8375 CCVal = ISD::getSetCCSwappedOperands(CCVal);
8376 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
8377 }
8378
8379 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
8380 return SplitVectorOp(Op, DAG);
8381
8382 return lowerToScalableOp(Op, DAG);
8383 }
8384 case ISD::ADD:
8385 case ISD::SUB:
8386 case ISD::MUL:
8387 case ISD::MULHS:
8388 case ISD::MULHU:
8389 case ISD::AND:
8390 case ISD::OR:
8391 case ISD::XOR:
8392 case ISD::SDIV:
8393 case ISD::SREM:
8394 case ISD::UDIV:
8395 case ISD::UREM:
8396 case ISD::BSWAP:
8397 case ISD::CTPOP:
8398 case ISD::VSELECT:
8399 return lowerToScalableOp(Op, DAG);
8400 case ISD::SHL:
8401 case ISD::SRA:
8402 case ISD::SRL:
8403 if (Op.getSimpleValueType().isFixedLengthVector())
8404 return lowerToScalableOp(Op, DAG);
8405 // This can be called for an i32 shift amount that needs to be promoted.
8406 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
8407 "Unexpected custom legalisation");
8408 return SDValue();
8409 case ISD::FABS:
8410 case ISD::FNEG:
8411 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
8412 return lowerFABSorFNEG(Op, DAG, Subtarget);
8413 [[fallthrough]];
8414 case ISD::FADD:
8415 case ISD::FSUB:
8416 case ISD::FMUL:
8417 case ISD::FDIV:
8418 case ISD::FSQRT:
8419 case ISD::FMA:
8420 case ISD::FMINNUM:
8421 case ISD::FMAXNUM:
8422 case ISD::FMINIMUMNUM:
8423 case ISD::FMAXIMUMNUM:
8424 if (isPromotedOpNeedingSplit(Op, Subtarget))
8425 return SplitVectorOp(Op, DAG);
8426 [[fallthrough]];
8427 case ISD::AVGFLOORS:
8428 case ISD::AVGFLOORU:
8429 case ISD::AVGCEILS:
8430 case ISD::AVGCEILU:
8431 case ISD::SMIN:
8432 case ISD::SMAX:
8433 case ISD::UMIN:
8434 case ISD::UMAX:
8435 case ISD::UADDSAT:
8436 case ISD::USUBSAT:
8437 case ISD::SADDSAT:
8438 case ISD::SSUBSAT:
8439 return lowerToScalableOp(Op, DAG);
8440 case ISD::ABDS:
8441 case ISD::ABDU: {
8442 SDLoc dl(Op);
8443 EVT VT = Op->getValueType(0);
8444 SDValue LHS = DAG.getFreeze(Op->getOperand(0));
8445 SDValue RHS = DAG.getFreeze(Op->getOperand(1));
8446 bool IsSigned = Op->getOpcode() == ISD::ABDS;
8447
8448 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
8449 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
8450 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
8451 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
8452 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
8453 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
8454 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
8455 }
8456 case ISD::ABS:
8457 case ISD::VP_ABS:
8458 return lowerABS(Op, DAG);
8459 case ISD::CTLZ:
8461 case ISD::CTTZ:
8463 if (Subtarget.hasStdExtZvbb())
8464 return lowerToScalableOp(Op, DAG);
8465 assert(Op.getOpcode() != ISD::CTTZ);
8466 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
8467 case ISD::FCOPYSIGN:
8468 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
8469 return lowerFCOPYSIGN(Op, DAG, Subtarget);
8470 if (isPromotedOpNeedingSplit(Op, Subtarget))
8471 return SplitVectorOp(Op, DAG);
8472 return lowerToScalableOp(Op, DAG);
8473 case ISD::STRICT_FADD:
8474 case ISD::STRICT_FSUB:
8475 case ISD::STRICT_FMUL:
8476 case ISD::STRICT_FDIV:
8477 case ISD::STRICT_FSQRT:
8478 case ISD::STRICT_FMA:
8479 if (isPromotedOpNeedingSplit(Op, Subtarget))
8480 return SplitStrictFPVectorOp(Op, DAG);
8481 return lowerToScalableOp(Op, DAG);
8482 case ISD::STRICT_FSETCC:
8484 return lowerVectorStrictFSetcc(Op, DAG);
8485 case ISD::STRICT_FCEIL:
8486 case ISD::STRICT_FRINT:
8487 case ISD::STRICT_FFLOOR:
8488 case ISD::STRICT_FTRUNC:
8490 case ISD::STRICT_FROUND:
8492 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
8493 case ISD::MGATHER:
8494 case ISD::VP_GATHER:
8495 return lowerMaskedGather(Op, DAG);
8496 case ISD::MSCATTER:
8497 case ISD::VP_SCATTER:
8498 return lowerMaskedScatter(Op, DAG);
8499 case ISD::GET_ROUNDING:
8500 return lowerGET_ROUNDING(Op, DAG);
8501 case ISD::SET_ROUNDING:
8502 return lowerSET_ROUNDING(Op, DAG);
8503 case ISD::GET_FPENV:
8504 return lowerGET_FPENV(Op, DAG);
8505 case ISD::SET_FPENV:
8506 return lowerSET_FPENV(Op, DAG);
8507 case ISD::RESET_FPENV:
8508 return lowerRESET_FPENV(Op, DAG);
8509 case ISD::GET_FPMODE:
8510 return lowerGET_FPMODE(Op, DAG);
8511 case ISD::SET_FPMODE:
8512 return lowerSET_FPMODE(Op, DAG);
8513 case ISD::RESET_FPMODE:
8514 return lowerRESET_FPMODE(Op, DAG);
8515 case ISD::EH_DWARF_CFA:
8516 return lowerEH_DWARF_CFA(Op, DAG);
8517 case ISD::VP_MERGE:
8518 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
8519 return lowerVPMergeMask(Op, DAG);
8520 [[fallthrough]];
8521 case ISD::VP_SELECT:
8522 case ISD::VP_ADD:
8523 case ISD::VP_SUB:
8524 case ISD::VP_MUL:
8525 case ISD::VP_SDIV:
8526 case ISD::VP_UDIV:
8527 case ISD::VP_SREM:
8528 case ISD::VP_UREM:
8529 case ISD::VP_UADDSAT:
8530 case ISD::VP_USUBSAT:
8531 case ISD::VP_SADDSAT:
8532 case ISD::VP_SSUBSAT:
8533 case ISD::VP_LRINT:
8534 case ISD::VP_LLRINT:
8535 return lowerVPOp(Op, DAG);
8536 case ISD::VP_AND:
8537 case ISD::VP_OR:
8538 case ISD::VP_XOR:
8539 return lowerLogicVPOp(Op, DAG);
8540 case ISD::VP_FADD:
8541 case ISD::VP_FSUB:
8542 case ISD::VP_FMUL:
8543 case ISD::VP_FDIV:
8544 case ISD::VP_FNEG:
8545 case ISD::VP_FABS:
8546 case ISD::VP_SQRT:
8547 case ISD::VP_FMA:
8548 case ISD::VP_FMINNUM:
8549 case ISD::VP_FMAXNUM:
8550 case ISD::VP_FCOPYSIGN:
8551 if (isPromotedOpNeedingSplit(Op, Subtarget))
8552 return SplitVPOp(Op, DAG);
8553 [[fallthrough]];
8554 case ISD::VP_SRA:
8555 case ISD::VP_SRL:
8556 case ISD::VP_SHL:
8557 return lowerVPOp(Op, DAG);
8558 case ISD::VP_IS_FPCLASS:
8559 return LowerIS_FPCLASS(Op, DAG);
8560 case ISD::VP_SIGN_EXTEND:
8561 case ISD::VP_ZERO_EXTEND:
8562 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
8563 return lowerVPExtMaskOp(Op, DAG);
8564 return lowerVPOp(Op, DAG);
8565 case ISD::VP_TRUNCATE:
8566 return lowerVectorTruncLike(Op, DAG);
8567 case ISD::VP_FP_EXTEND:
8568 case ISD::VP_FP_ROUND:
8569 return lowerVectorFPExtendOrRoundLike(Op, DAG);
8570 case ISD::VP_SINT_TO_FP:
8571 case ISD::VP_UINT_TO_FP:
8572 if (Op.getValueType().isVector() &&
8573 ((Op.getValueType().getScalarType() == MVT::f16 &&
8574 (Subtarget.hasVInstructionsF16Minimal() &&
8575 !Subtarget.hasVInstructionsF16())) ||
8576 Op.getValueType().getScalarType() == MVT::bf16)) {
8577 if (isPromotedOpNeedingSplit(Op, Subtarget))
8578 return SplitVectorOp(Op, DAG);
8579 // int -> f32
8580 SDLoc DL(Op);
8581 MVT NVT =
8582 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
8583 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
8584 // f32 -> [b]f16
8585 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
8586 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
8587 }
8588 [[fallthrough]];
8589 case ISD::VP_FP_TO_SINT:
8590 case ISD::VP_FP_TO_UINT:
8591 if (SDValue Op1 = Op.getOperand(0);
8592 Op1.getValueType().isVector() &&
8593 ((Op1.getValueType().getScalarType() == MVT::f16 &&
8594 (Subtarget.hasVInstructionsF16Minimal() &&
8595 !Subtarget.hasVInstructionsF16())) ||
8596 Op1.getValueType().getScalarType() == MVT::bf16)) {
8597 if (isPromotedOpNeedingSplit(Op1, Subtarget))
8598 return SplitVectorOp(Op, DAG);
8599 // [b]f16 -> f32
8600 SDLoc DL(Op);
8601 MVT NVT = MVT::getVectorVT(MVT::f32,
8602 Op1.getValueType().getVectorElementCount());
8603 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
8604 // f32 -> int
8605 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
8606 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
8607 }
8608 return lowerVPFPIntConvOp(Op, DAG);
8609 case ISD::VP_SETCC:
8610 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
8611 return SplitVPOp(Op, DAG);
8612 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
8613 return lowerVPSetCCMaskOp(Op, DAG);
8614 [[fallthrough]];
8615 case ISD::VP_SMIN:
8616 case ISD::VP_SMAX:
8617 case ISD::VP_UMIN:
8618 case ISD::VP_UMAX:
8619 case ISD::VP_BITREVERSE:
8620 case ISD::VP_BSWAP:
8621 return lowerVPOp(Op, DAG);
8622 case ISD::VP_CTLZ:
8623 case ISD::VP_CTLZ_ZERO_UNDEF:
8624 if (Subtarget.hasStdExtZvbb())
8625 return lowerVPOp(Op, DAG);
8626 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
8627 case ISD::VP_CTTZ:
8628 case ISD::VP_CTTZ_ZERO_UNDEF:
8629 if (Subtarget.hasStdExtZvbb())
8630 return lowerVPOp(Op, DAG);
8631 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
8632 case ISD::VP_CTPOP:
8633 return lowerVPOp(Op, DAG);
8634 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
8635 return lowerVPStridedLoad(Op, DAG);
8636 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
8637 return lowerVPStridedStore(Op, DAG);
8638 case ISD::VP_FCEIL:
8639 case ISD::VP_FFLOOR:
8640 case ISD::VP_FRINT:
8641 case ISD::VP_FNEARBYINT:
8642 case ISD::VP_FROUND:
8643 case ISD::VP_FROUNDEVEN:
8644 case ISD::VP_FROUNDTOZERO:
8645 if (isPromotedOpNeedingSplit(Op, Subtarget))
8646 return SplitVPOp(Op, DAG);
8647 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
8648 case ISD::VP_FMAXIMUM:
8649 case ISD::VP_FMINIMUM:
8650 if (isPromotedOpNeedingSplit(Op, Subtarget))
8651 return SplitVPOp(Op, DAG);
8652 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
8653 case ISD::EXPERIMENTAL_VP_SPLICE:
8654 return lowerVPSpliceExperimental(Op, DAG);
8655 case ISD::EXPERIMENTAL_VP_REVERSE:
8656 return lowerVPReverseExperimental(Op, DAG);
8657 case ISD::EXPERIMENTAL_VP_SPLAT:
8658 return lowerVPSplatExperimental(Op, DAG);
8659 case ISD::CLEAR_CACHE: {
8660 assert(getTargetMachine().getTargetTriple().isOSLinux() &&
8661 "llvm.clear_cache only needs custom lower on Linux targets");
8662 SDLoc DL(Op);
8663 SDValue Flags = DAG.getConstant(0, DL, Subtarget.getXLenVT());
8664 return emitFlushICache(DAG, Op.getOperand(0), Op.getOperand(1),
8665 Op.getOperand(2), Flags, DL);
8666 }
8667 case ISD::DYNAMIC_STACKALLOC:
8668 return lowerDYNAMIC_STACKALLOC(Op, DAG);
8669 case ISD::INIT_TRAMPOLINE:
8670 return lowerINIT_TRAMPOLINE(Op, DAG);
8671 case ISD::ADJUST_TRAMPOLINE:
8672 return lowerADJUST_TRAMPOLINE(Op, DAG);
8673 case ISD::PARTIAL_REDUCE_UMLA:
8674 case ISD::PARTIAL_REDUCE_SMLA:
8675 case ISD::PARTIAL_REDUCE_SUMLA:
8676 return lowerPARTIAL_REDUCE_MLA(Op, DAG);
8677 }
8678}
8679
8680SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG, SDValue InChain,
8681 SDValue Start, SDValue End,
8682 SDValue Flags, SDLoc DL) const {
8683 MakeLibCallOptions CallOptions;
8684 std::pair<SDValue, SDValue> CallResult =
8685 makeLibCall(DAG, RTLIB::RISCV_FLUSH_ICACHE, MVT::isVoid,
8686 {Start, End, Flags}, CallOptions, DL, InChain);
8687
8688 // This function returns void so only the out chain matters.
8689 return CallResult.second;
8690}
8691
8692SDValue RISCVTargetLowering::lowerINIT_TRAMPOLINE(SDValue Op,
8693 SelectionDAG &DAG) const {
8694 if (!Subtarget.is64Bit())
8695 llvm::reportFatalUsageError("Trampolines only implemented for RV64");
8696
8697 // Create an MCCodeEmitter to encode instructions.
8698 TargetLoweringObjectFile *TLO = getTargetMachine().getObjFileLowering();
8699 assert(TLO);
8700 MCContext &MCCtx = TLO->getContext();
8701
8702 std::unique_ptr<MCCodeEmitter> CodeEmitter(
8703 createRISCVMCCodeEmitter(*getTargetMachine().getMCInstrInfo(), MCCtx));
8704
8705 SDValue Root = Op.getOperand(0);
8706 SDValue Trmp = Op.getOperand(1); // trampoline
8707 SDLoc dl(Op);
8708
8709 const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
8710
8711 // We store in the trampoline buffer the following instructions and data.
8712 // Offset:
8713 // 0: auipc t2, 0
8714 // 4: ld t0, 24(t2)
8715 // 8: ld t2, 16(t2)
8716 // 12: jalr t0
8717 // 16: <StaticChainOffset>
8718 // 24: <FunctionAddressOffset>
8719 // 32:
8720 // Offset with branch control flow protection enabled:
8721 // 0: lpad <imm20>
8722 // 4: auipc t3, 0
8723 // 8: ld t2, 28(t3)
8724 // 12: ld t3, 20(t3)
8725 // 16: jalr t2
8726 // 20: <StaticChainOffset>
8727 // 28: <FunctionAddressOffset>
8728 // 36:
8729
8730 const bool HasCFBranch =
8731 Subtarget.hasStdExtZicfilp() &&
8733 "cf-protection-branch");
8734 const unsigned StaticChainIdx = HasCFBranch ? 5 : 4;
8735 const unsigned StaticChainOffset = StaticChainIdx * 4;
8736 const unsigned FunctionAddressOffset = StaticChainOffset + 8;
8737
8738 const MCSubtargetInfo *STI = getTargetMachine().getMCSubtargetInfo();
8739 assert(STI);
8740 auto GetEncoding = [&](const MCInst &MC) {
8743 CodeEmitter->encodeInstruction(MC, CB, Fixups, *STI);
8744 uint32_t Encoding = support::endian::read32le(CB.data());
8745 return Encoding;
8746 };
8747
8748 SmallVector<SDValue> OutChains;
8749
8750 SmallVector<uint32_t> Encodings;
8751 if (!HasCFBranch) {
8752 Encodings.append(
8753 {// auipc t2, 0
8754 // Loads the current PC into t2.
8755 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X7).addImm(0)),
8756 // ld t0, 24(t2)
8757 // Loads the function address into t0. Note that we are using offsets
8758 // pc-relative to the first instruction of the trampoline.
8759 GetEncoding(MCInstBuilder(RISCV::LD)
8760 .addReg(RISCV::X5)
8761 .addReg(RISCV::X7)
8762 .addImm(FunctionAddressOffset)),
8763 // ld t2, 16(t2)
8764 // Load the value of the static chain.
8765 GetEncoding(MCInstBuilder(RISCV::LD)
8766 .addReg(RISCV::X7)
8767 .addReg(RISCV::X7)
8768 .addImm(StaticChainOffset)),
8769 // jalr t0
8770 // Jump to the function.
8771 GetEncoding(MCInstBuilder(RISCV::JALR)
8772 .addReg(RISCV::X0)
8773 .addReg(RISCV::X5)
8774 .addImm(0))});
8775 } else {
8776 Encodings.append(
8777 {// auipc x0, <imm20> (lpad <imm20>)
8778 // Landing pad.
8779 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X0).addImm(0)),
8780 // auipc t3, 0
8781 // Loads the current PC into t3.
8782 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X28).addImm(0)),
8783 // ld t2, (FunctionAddressOffset - 4)(t3)
8784 // Loads the function address into t2. Note that we are using offsets
8785 // pc-relative to the SECOND instruction of the trampoline.
8786 GetEncoding(MCInstBuilder(RISCV::LD)
8787 .addReg(RISCV::X7)
8788 .addReg(RISCV::X28)
8789 .addImm(FunctionAddressOffset - 4)),
8790 // ld t3, (StaticChainOffset - 4)(t3)
8791 // Load the value of the static chain.
8792 GetEncoding(MCInstBuilder(RISCV::LD)
8793 .addReg(RISCV::X28)
8794 .addReg(RISCV::X28)
8795 .addImm(StaticChainOffset - 4)),
8796 // jalr t2
8797 // Software-guarded jump to the function.
8798 GetEncoding(MCInstBuilder(RISCV::JALR)
8799 .addReg(RISCV::X0)
8800 .addReg(RISCV::X7)
8801 .addImm(0))});
8802 }
8803
8804 // Store encoded instructions.
8805 for (auto [Idx, Encoding] : llvm::enumerate(Encodings)) {
8806 SDValue Addr = Idx > 0 ? DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
8807 DAG.getConstant(Idx * 4, dl, MVT::i64))
8808 : Trmp;
8809 OutChains.push_back(DAG.getTruncStore(
8810 Root, dl, DAG.getConstant(Encoding, dl, MVT::i64), Addr,
8811 MachinePointerInfo(TrmpAddr, Idx * 4), MVT::i32));
8812 }
8813
8814 // Now store the variable part of the trampoline.
8815 SDValue FunctionAddress = Op.getOperand(2);
8816 SDValue StaticChain = Op.getOperand(3);
8817
8818 // Store the given static chain and function pointer in the trampoline buffer.
8819 struct OffsetValuePair {
8820 const unsigned Offset;
8821 const SDValue Value;
8822 SDValue Addr = SDValue(); // Used to cache the address.
8823 } OffsetValues[] = {
8824 {StaticChainOffset, StaticChain},
8825 {FunctionAddressOffset, FunctionAddress},
8826 };
8827 for (auto &OffsetValue : OffsetValues) {
8828 SDValue Addr =
8829 DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
8830 DAG.getConstant(OffsetValue.Offset, dl, MVT::i64));
8831 OffsetValue.Addr = Addr;
8832 OutChains.push_back(
8833 DAG.getStore(Root, dl, OffsetValue.Value, Addr,
8834 MachinePointerInfo(TrmpAddr, OffsetValue.Offset)));
8835 }
8836
8837 assert(OutChains.size() == StaticChainIdx + 2 &&
8838 "Size of OutChains mismatch");
8839 SDValue StoreToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
8840
8841 // The end of instructions of trampoline is the same as the static chain
8842 // address that we computed earlier.
8843 SDValue EndOfTrmp = OffsetValues[0].Addr;
8844
8845 // Call clear cache on the trampoline instructions.
8846 SDValue Chain = DAG.getNode(ISD::CLEAR_CACHE, dl, MVT::Other, StoreToken,
8847 Trmp, EndOfTrmp);
8848
8849 return Chain;
8850}
8851
8852SDValue RISCVTargetLowering::lowerADJUST_TRAMPOLINE(SDValue Op,
8853 SelectionDAG &DAG) const {
8854 if (!Subtarget.is64Bit())
8855 llvm::reportFatalUsageError("Trampolines only implemented for RV64");
8856
8857 return Op.getOperand(0);
8858}
8859
8860SDValue RISCVTargetLowering::lowerPARTIAL_REDUCE_MLA(SDValue Op,
8861 SelectionDAG &DAG) const {
8862 // Currently, only the vqdot and vqdotu case (from zvqdotq) should be legal.
8863 // TODO: There are many other sub-cases we could potentially lower, are
8864 // any of them worthwhile? Ex: via vredsum, vwredsum, vwwmaccu, etc..
8865 SDLoc DL(Op);
8866 MVT VT = Op.getSimpleValueType();
8867 SDValue Accum = Op.getOperand(0);
8868 assert(Accum.getSimpleValueType() == VT &&
8869 VT.getVectorElementType() == MVT::i32);
8870 SDValue A = Op.getOperand(1);
8871 SDValue B = Op.getOperand(2);
8872 MVT ArgVT = A.getSimpleValueType();
8873 assert(ArgVT == B.getSimpleValueType() &&
8874 ArgVT.getVectorElementType() == MVT::i8);
8875 (void)ArgVT;
8876
8877 // The zvqdotq pseudos are defined with sources and destination both
8878 // being i32. This cast is needed for correctness to avoid incorrect
8879 // .vx matching of i8 splats.
8880 A = DAG.getBitcast(VT, A);
8881 B = DAG.getBitcast(VT, B);
8882
8883 MVT ContainerVT = VT;
8884 if (VT.isFixedLengthVector()) {
8885 ContainerVT = getContainerForFixedLengthVector(VT);
8886 Accum = convertToScalableVector(ContainerVT, Accum, DAG, Subtarget);
8887 A = convertToScalableVector(ContainerVT, A, DAG, Subtarget);
8888 B = convertToScalableVector(ContainerVT, B, DAG, Subtarget);
8889 }
8890
8891 unsigned Opc;
8892 switch (Op.getOpcode()) {
8893 case ISD::PARTIAL_REDUCE_SMLA:
8894 Opc = RISCVISD::VQDOT_VL;
8895 break;
8896 case ISD::PARTIAL_REDUCE_UMLA:
8897 Opc = RISCVISD::VQDOTU_VL;
8898 break;
8899 case ISD::PARTIAL_REDUCE_SUMLA:
8900 Opc = RISCVISD::VQDOTSU_VL;
8901 break;
8902 default:
8903 llvm_unreachable("Unexpected opcode");
8904 }
8905 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
8906 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, {A, B, Accum, Mask, VL});
8907 if (VT.isFixedLengthVector())
8908 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
8909 return Res;
8910}
8911
8913 SelectionDAG &DAG, unsigned Flags) {
8914 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
8915}
8916
8918 SelectionDAG &DAG, unsigned Flags) {
8919 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
8920 Flags);
8921}
8922
8924 SelectionDAG &DAG, unsigned Flags) {
8925 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
8926 N->getOffset(), Flags);
8927}
8928
8930 SelectionDAG &DAG, unsigned Flags) {
8931 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
8932}
8933
8935 EVT Ty, SelectionDAG &DAG) {
8937 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
8938 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
8939 return DAG.getLoad(
8940 Ty, DL, DAG.getEntryNode(), LC,
8942}
8943
8945 EVT Ty, SelectionDAG &DAG) {
8947 RISCVConstantPoolValue::Create(*DAG.getContext(), N->getSymbol());
8948 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
8949 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
8950 return DAG.getLoad(
8951 Ty, DL, DAG.getEntryNode(), LC,
8953}
8954
8955template <class NodeTy>
8956SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
8957 bool IsLocal, bool IsExternWeak) const {
8958 SDLoc DL(N);
8959 EVT Ty = getPointerTy(DAG.getDataLayout());
8960
8961 // When HWASAN is used and tagging of global variables is enabled
8962 // they should be accessed via the GOT, since the tagged address of a global
8963 // is incompatible with existing code models. This also applies to non-pic
8964 // mode.
8965 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
8966 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8967 if (IsLocal && !Subtarget.allowTaggedGlobals())
8968 // Use PC-relative addressing to access the symbol. This generates the
8969 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
8970 // %pcrel_lo(auipc)).
8971 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
8972
8973 // Use PC-relative addressing to access the GOT for this symbol, then load
8974 // the address from the GOT. This generates the pattern (PseudoLGA sym),
8975 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
8976 SDValue Load =
8977 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
8978 MachineFunction &MF = DAG.getMachineFunction();
8979 MachineMemOperand *MemOp = MF.getMachineMemOperand(
8983 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
8984 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
8985 return Load;
8986 }
8987
8988 switch (getTargetMachine().getCodeModel()) {
8989 default:
8990 reportFatalUsageError("Unsupported code model for lowering");
8991 case CodeModel::Small: {
8992 // Generate a sequence for accessing addresses within the first 2 GiB of
8993 // address space.
8994 if (Subtarget.hasVendorXqcili()) {
8995 // Use QC.E.LI to generate the address, as this is easier to relax than
8996 // LUI/ADDI.
8997 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8998 return DAG.getNode(RISCVISD::QC_E_LI, DL, Ty, Addr);
8999 }
9000
9001 // This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
9002 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
9003 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
9004 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
9005 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
9006 }
9007 case CodeModel::Medium: {
9008 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
9009 if (IsExternWeak) {
9010 // An extern weak symbol may be undefined, i.e. have value 0, which may
9011 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
9012 // symbol. This generates the pattern (PseudoLGA sym), which expands to
9013 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
9014 SDValue Load =
9015 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
9016 MachineFunction &MF = DAG.getMachineFunction();
9017 MachineMemOperand *MemOp = MF.getMachineMemOperand(
9021 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
9022 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
9023 return Load;
9024 }
9025
9026 // Generate a sequence for accessing addresses within any 2GiB range within
9027 // the address space. This generates the pattern (PseudoLLA sym), which
9028 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
9029 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
9030 }
9031 case CodeModel::Large: {
9032 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N))
9033 return getLargeGlobalAddress(G, DL, Ty, DAG);
9034
9035 // Using pc-relative mode for other node type.
9036 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
9037 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
9038 }
9039 }
9040}
9041
9042SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
9043 SelectionDAG &DAG) const {
9044 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
9045 assert(N->getOffset() == 0 && "unexpected offset in global node");
9046 const GlobalValue *GV = N->getGlobal();
9047 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
9048}
9049
9050SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
9051 SelectionDAG &DAG) const {
9052 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
9053
9054 return getAddr(N, DAG);
9055}
9056
9057SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
9058 SelectionDAG &DAG) const {
9059 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
9060
9061 return getAddr(N, DAG);
9062}
9063
9064SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
9065 SelectionDAG &DAG) const {
9066 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
9067
9068 return getAddr(N, DAG);
9069}
9070
9071SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
9072 SelectionDAG &DAG,
9073 bool UseGOT) const {
9074 SDLoc DL(N);
9075 EVT Ty = getPointerTy(DAG.getDataLayout());
9076 const GlobalValue *GV = N->getGlobal();
9077 MVT XLenVT = Subtarget.getXLenVT();
9078
9079 if (UseGOT) {
9080 // Use PC-relative addressing to access the GOT for this TLS symbol, then
9081 // load the address from the GOT and add the thread pointer. This generates
9082 // the pattern (PseudoLA_TLS_IE sym), which expands to
9083 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
9084 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
9085 SDValue Load =
9086 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
9087 MachineFunction &MF = DAG.getMachineFunction();
9088 MachineMemOperand *MemOp = MF.getMachineMemOperand(
9092 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
9093 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
9094
9095 // Add the thread pointer.
9096 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
9097 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
9098 }
9099
9100 // Generate a sequence for accessing the address relative to the thread
9101 // pointer, with the appropriate adjustment for the thread pointer offset.
9102 // This generates the pattern
9103 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
9104 SDValue AddrHi =
9106 SDValue AddrAdd =
9108 SDValue AddrLo =
9110
9111 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
9112 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
9113 SDValue MNAdd =
9114 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
9115 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
9116}
9117
9118SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
9119 SelectionDAG &DAG) const {
9120 SDLoc DL(N);
9121 EVT Ty = getPointerTy(DAG.getDataLayout());
9122 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
9123 const GlobalValue *GV = N->getGlobal();
9124
9125 // Use a PC-relative addressing mode to access the global dynamic GOT address.
9126 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
9127 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
9128 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
9129 SDValue Load =
9130 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
9131
9132 // Prepare argument list to generate call.
9134 Args.emplace_back(Load, CallTy);
9135
9136 // Setup call to __tls_get_addr.
9137 TargetLowering::CallLoweringInfo CLI(DAG);
9138 CLI.setDebugLoc(DL)
9139 .setChain(DAG.getEntryNode())
9140 .setLibCallee(CallingConv::C, CallTy,
9141 DAG.getExternalSymbol("__tls_get_addr", Ty),
9142 std::move(Args));
9143
9144 return LowerCallTo(CLI).first;
9145}
9146
9147SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
9148 SelectionDAG &DAG) const {
9149 SDLoc DL(N);
9150 EVT Ty = getPointerTy(DAG.getDataLayout());
9151 const GlobalValue *GV = N->getGlobal();
9152
9153 // Use a PC-relative addressing mode to access the global dynamic GOT address.
9154 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
9155 //
9156 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
9157 // lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
9158 // addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)
9159 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
9160 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
9161 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);
9162}
9163
9164SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
9165 SelectionDAG &DAG) const {
9166 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
9167 assert(N->getOffset() == 0 && "unexpected offset in global node");
9168
9169 if (DAG.getTarget().useEmulatedTLS())
9170 return LowerToTLSEmulatedModel(N, DAG);
9171
9173
9176 reportFatalUsageError("In GHC calling convention TLS is not supported");
9177
9178 SDValue Addr;
9179 switch (Model) {
9181 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
9182 break;
9184 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
9185 break;
9188 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
9189 : getDynamicTLSAddr(N, DAG);
9190 break;
9191 }
9192
9193 return Addr;
9194}
9195
9196// Return true if Val is equal to (setcc LHS, RHS, CC).
9197// Return false if Val is the inverse of (setcc LHS, RHS, CC).
9198// Otherwise, return std::nullopt.
9199static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
9200 ISD::CondCode CC, SDValue Val) {
9201 assert(Val->getOpcode() == ISD::SETCC);
9202 SDValue LHS2 = Val.getOperand(0);
9203 SDValue RHS2 = Val.getOperand(1);
9204 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
9205
9206 if (LHS == LHS2 && RHS == RHS2) {
9207 if (CC == CC2)
9208 return true;
9209 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
9210 return false;
9211 } else if (LHS == RHS2 && RHS == LHS2) {
9213 if (CC == CC2)
9214 return true;
9215 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
9216 return false;
9217 }
9218
9219 return std::nullopt;
9220}
9221
9223 return isa<ConstantSDNode>(V) && V->getAsAPIntVal().isSignedIntN(12);
9224}
9225
9227 const RISCVSubtarget &Subtarget) {
9228 SDValue CondV = N->getOperand(0);
9229 SDValue TrueV = N->getOperand(1);
9230 SDValue FalseV = N->getOperand(2);
9231 MVT VT = N->getSimpleValueType(0);
9232 SDLoc DL(N);
9233
9234 if (!Subtarget.hasConditionalMoveFusion()) {
9235 // (select c, -1, y) -> -c | y
9236 if (isAllOnesConstant(TrueV)) {
9237 SDValue Neg = DAG.getNegative(CondV, DL, VT);
9238 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
9239 }
9240 // (select c, y, -1) -> (c-1) | y
9241 if (isAllOnesConstant(FalseV)) {
9242 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
9243 DAG.getAllOnesConstant(DL, VT));
9244 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
9245 }
9246
9247 const bool HasCZero = VT.isScalarInteger() && Subtarget.hasCZEROLike();
9248
9249 // (select c, 0, y) -> (c-1) & y
9250 if (isNullConstant(TrueV) && (!HasCZero || isSimm12Constant(FalseV))) {
9251 SDValue Neg =
9252 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
9253 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
9254 }
9255 if (isNullConstant(FalseV)) {
9256 // (select c, (1 << ShAmount) + 1, 0) -> (c << ShAmount) + c
9257 if (auto *TrueC = dyn_cast<ConstantSDNode>(TrueV)) {
9258 uint64_t TrueM1 = TrueC->getZExtValue() - 1;
9259 if (isPowerOf2_64(TrueM1)) {
9260 unsigned ShAmount = Log2_64(TrueM1);
9261 if (Subtarget.hasShlAdd(ShAmount))
9262 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, CondV,
9263 DAG.getTargetConstant(ShAmount, DL, VT), CondV);
9264 }
9265 }
9266 // (select c, y, 0) -> -c & y
9267 if (!HasCZero || isSimm12Constant(TrueV)) {
9268 SDValue Neg = DAG.getNegative(CondV, DL, VT);
9269 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
9270 }
9271 }
9272 }
9273
9274 // select c, ~x, x --> xor -c, x
9275 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
9276 const APInt &TrueVal = TrueV->getAsAPIntVal();
9277 const APInt &FalseVal = FalseV->getAsAPIntVal();
9278 if (~TrueVal == FalseVal) {
9279 SDValue Neg = DAG.getNegative(CondV, DL, VT);
9280 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
9281 }
9282 }
9283
9284 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
9285 // when both truev and falsev are also setcc.
9286 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
9287 FalseV.getOpcode() == ISD::SETCC) {
9288 SDValue LHS = CondV.getOperand(0);
9289 SDValue RHS = CondV.getOperand(1);
9290 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9291
9292 // (select x, x, y) -> x | y
9293 // (select !x, x, y) -> x & y
9294 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
9295 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
9296 DAG.getFreeze(FalseV));
9297 }
9298 // (select x, y, x) -> x & y
9299 // (select !x, y, x) -> x | y
9300 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
9301 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
9302 DAG.getFreeze(TrueV), FalseV);
9303 }
9304 }
9305
9306 return SDValue();
9307}
9308
9309// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
9310// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
9311// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
9312// being `0` or `-1`. In such cases we can replace `select` with `and`.
9313// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
9314// than `c0`?
9315static SDValue
9317 const RISCVSubtarget &Subtarget) {
9318 if (Subtarget.hasShortForwardBranchOpt())
9319 return SDValue();
9320
9321 unsigned SelOpNo = 0;
9322 SDValue Sel = BO->getOperand(0);
9323 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
9324 SelOpNo = 1;
9325 Sel = BO->getOperand(1);
9326 }
9327
9328 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
9329 return SDValue();
9330
9331 unsigned ConstSelOpNo = 1;
9332 unsigned OtherSelOpNo = 2;
9333 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
9334 ConstSelOpNo = 2;
9335 OtherSelOpNo = 1;
9336 }
9337 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
9338 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
9339 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
9340 return SDValue();
9341
9342 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
9343 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
9344 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
9345 return SDValue();
9346
9347 SDLoc DL(Sel);
9348 EVT VT = BO->getValueType(0);
9349
9350 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
9351 if (SelOpNo == 1)
9352 std::swap(NewConstOps[0], NewConstOps[1]);
9353
9354 SDValue NewConstOp =
9355 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
9356 if (!NewConstOp)
9357 return SDValue();
9358
9359 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
9360 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
9361 return SDValue();
9362
9363 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
9364 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
9365 if (SelOpNo == 1)
9366 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
9367 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
9368
9369 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
9370 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
9371 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
9372}
9373
9374SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
9375 SDValue CondV = Op.getOperand(0);
9376 SDValue TrueV = Op.getOperand(1);
9377 SDValue FalseV = Op.getOperand(2);
9378 SDLoc DL(Op);
9379 MVT VT = Op.getSimpleValueType();
9380 MVT XLenVT = Subtarget.getXLenVT();
9381
9382 // Lower vector SELECTs to VSELECTs by splatting the condition.
9383 if (VT.isVector()) {
9384 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
9385 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
9386 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
9387 }
9388
9389 // Try some other optimizations before falling back to generic lowering.
9390 if (SDValue V = lowerSelectToBinOp(Op.getNode(), DAG, Subtarget))
9391 return V;
9392
9393 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
9394 // nodes to implement the SELECT. Performing the lowering here allows for
9395 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
9396 // sequence or RISCVISD::SELECT_CC node (branch-based select).
9397 if (Subtarget.hasCZEROLike() && VT.isScalarInteger()) {
9398
9399 // (select c, t, 0) -> (czero_eqz t, c)
9400 if (isNullConstant(FalseV))
9401 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
9402 // (select c, 0, f) -> (czero_nez f, c)
9403 if (isNullConstant(TrueV))
9404 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
9405
9406 // Check to see if a given operation is a 'NOT', if so return the negated
9407 // operand
9408 auto getNotOperand = [](const SDValue &Op) -> std::optional<const SDValue> {
9409 using namespace llvm::SDPatternMatch;
9410 SDValue Xor;
9411 if (sd_match(Op, m_OneUse(m_Not(m_Value(Xor))))) {
9412 return Xor;
9413 }
9414 return std::nullopt;
9415 };
9416 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
9417 // (select c, (and f, ~x), f) -> (andn f, (czero_eqz x, c))
9418 if (TrueV.getOpcode() == ISD::AND &&
9419 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV)) {
9420 auto NotOperand = (TrueV.getOperand(0) == FalseV)
9421 ? getNotOperand(TrueV.getOperand(1))
9422 : getNotOperand(TrueV.getOperand(0));
9423 if (NotOperand) {
9424 SDValue CMOV =
9425 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, *NotOperand, CondV);
9426 SDValue NOT = DAG.getNOT(DL, CMOV, VT);
9427 return DAG.getNode(ISD::AND, DL, VT, FalseV, NOT);
9428 }
9429 return DAG.getNode(
9430 ISD::OR, DL, VT, TrueV,
9431 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
9432 }
9433
9434 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
9435 // (select c, t, (and t, ~x)) -> (andn t, (czero_nez x, c))
9436 if (FalseV.getOpcode() == ISD::AND &&
9437 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV)) {
9438 auto NotOperand = (FalseV.getOperand(0) == TrueV)
9439 ? getNotOperand(FalseV.getOperand(1))
9440 : getNotOperand(FalseV.getOperand(0));
9441 if (NotOperand) {
9442 SDValue CMOV =
9443 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, *NotOperand, CondV);
9444 SDValue NOT = DAG.getNOT(DL, CMOV, VT);
9445 return DAG.getNode(ISD::AND, DL, VT, TrueV, NOT);
9446 }
9447 return DAG.getNode(
9448 ISD::OR, DL, VT, FalseV,
9449 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
9450 }
9451
9452 // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
9453 // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
9454 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
9455 const APInt &TrueVal = TrueV->getAsAPIntVal();
9456 const APInt &FalseVal = FalseV->getAsAPIntVal();
9457
9458 // Prefer these over Zicond to avoid materializing an immediate:
9459 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
9460 // (select (x > -1), z, y) -> x >> (XLEN - 1) & (y - z) + z
9461 if (CondV.getOpcode() == ISD::SETCC &&
9462 CondV.getOperand(0).getValueType() == VT && CondV.hasOneUse()) {
9463 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9464 if ((CCVal == ISD::SETLT && isNullConstant(CondV.getOperand(1))) ||
9465 (CCVal == ISD::SETGT && isAllOnesConstant(CondV.getOperand(1)))) {
9466 int64_t TrueImm = TrueVal.getSExtValue();
9467 int64_t FalseImm = FalseVal.getSExtValue();
9468 if (CCVal == ISD::SETGT)
9469 std::swap(TrueImm, FalseImm);
9470 if (isInt<12>(TrueImm) && isInt<12>(FalseImm) &&
9471 isInt<12>(TrueImm - FalseImm)) {
9472 SDValue SRA =
9473 DAG.getNode(ISD::SRA, DL, VT, CondV.getOperand(0),
9474 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
9475 SDValue AND =
9476 DAG.getNode(ISD::AND, DL, VT, SRA,
9477 DAG.getSignedConstant(TrueImm - FalseImm, DL, VT));
9478 return DAG.getNode(ISD::ADD, DL, VT, AND,
9479 DAG.getSignedConstant(FalseImm, DL, VT));
9480 }
9481 }
9482 }
9483
9484 // Use SHL/ADDI (and possible XORI) to avoid having to materialize
9485 // a constant in register
9486 if ((TrueVal - FalseVal).isPowerOf2() && FalseVal.isSignedIntN(12)) {
9487 SDValue Log2 = DAG.getConstant((TrueVal - FalseVal).logBase2(), DL, VT);
9488 SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2);
9489 return DAG.getNode(ISD::ADD, DL, VT, FalseV, BitDiff);
9490 }
9491 if ((FalseVal - TrueVal).isPowerOf2() && TrueVal.isSignedIntN(12)) {
9492 SDValue Log2 = DAG.getConstant((FalseVal - TrueVal).logBase2(), DL, VT);
9493 CondV = DAG.getLogicalNOT(DL, CondV, CondV->getValueType(0));
9494 SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2);
9495 return DAG.getNode(ISD::ADD, DL, VT, TrueV, BitDiff);
9496 }
9497
9498 auto getCost = [&](const APInt &Delta, const APInt &Addend) {
9499 const int DeltaCost = RISCVMatInt::getIntMatCost(
9500 Delta, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
9501 // Does the addend fold into an ADDI
9502 if (Addend.isSignedIntN(12))
9503 return DeltaCost;
9504 const int AddendCost = RISCVMatInt::getIntMatCost(
9505 Addend, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
9506 return AddendCost + DeltaCost;
9507 };
9508 bool IsCZERO_NEZ = getCost(FalseVal - TrueVal, TrueVal) <=
9509 getCost(TrueVal - FalseVal, FalseVal);
9510 SDValue LHSVal = DAG.getConstant(
9511 IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
9512 SDValue CMOV =
9513 DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
9514 DL, VT, LHSVal, CondV);
9515 return DAG.getNode(ISD::ADD, DL, VT, CMOV, IsCZERO_NEZ ? TrueV : FalseV);
9516 }
9517
9518 // (select c, c1, t) -> (add (czero_nez t - c1, c), c1)
9519 // (select c, t, c1) -> (add (czero_eqz t - c1, c), c1)
9520 if (isa<ConstantSDNode>(TrueV) != isa<ConstantSDNode>(FalseV)) {
9521 bool IsCZERO_NEZ = isa<ConstantSDNode>(TrueV);
9522 SDValue ConstVal = IsCZERO_NEZ ? TrueV : FalseV;
9523 SDValue RegV = IsCZERO_NEZ ? FalseV : TrueV;
9524 int64_t RawConstVal = cast<ConstantSDNode>(ConstVal)->getSExtValue();
9525 // Fall back to XORI if Const == -0x800
9526 if (RawConstVal == -0x800) {
9527 SDValue XorOp = DAG.getNode(ISD::XOR, DL, VT, RegV, ConstVal);
9528 SDValue CMOV =
9529 DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
9530 DL, VT, XorOp, CondV);
9531 return DAG.getNode(ISD::XOR, DL, VT, CMOV, ConstVal);
9532 }
9533 // Efficient only if the constant and its negation fit into `ADDI`
9534 // Prefer Add/Sub over Xor since can be compressed for small immediates
9535 if (isInt<12>(RawConstVal)) {
9536 SDValue SubOp = DAG.getNode(ISD::SUB, DL, VT, RegV, ConstVal);
9537 SDValue CMOV =
9538 DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
9539 DL, VT, SubOp, CondV);
9540 return DAG.getNode(ISD::ADD, DL, VT, CMOV, ConstVal);
9541 }
9542 }
9543
9544 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
9545 // Unless we have the short forward branch optimization.
9546 if (!Subtarget.hasConditionalMoveFusion())
9547 return DAG.getNode(
9548 ISD::OR, DL, VT,
9549 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
9550 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV),
9552 }
9553
9554 if (Op.hasOneUse()) {
9555 unsigned UseOpc = Op->user_begin()->getOpcode();
9556 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
9557 SDNode *BinOp = *Op->user_begin();
9558 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
9559 DAG, Subtarget)) {
9560 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
9561 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
9562 // may return a constant node and cause crash in lowerSELECT.
9563 if (NewSel.getOpcode() == ISD::SELECT)
9564 return lowerSELECT(NewSel, DAG);
9565 return NewSel;
9566 }
9567 }
9568 }
9569
9570 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
9571 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
9572 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
9573 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
9574 if (FPTV && FPFV) {
9575 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
9576 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
9577 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
9578 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
9579 DAG.getConstant(1, DL, XLenVT));
9580 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
9581 }
9582 }
9583
9584 // If the condition is not an integer SETCC which operates on XLenVT, we need
9585 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
9586 // (select condv, truev, falsev)
9587 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
9588 if (CondV.getOpcode() != ISD::SETCC ||
9589 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
9590 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
9591 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
9592
9593 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
9594
9595 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
9596 }
9597
9598 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
9599 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
9600 // advantage of the integer compare+branch instructions. i.e.:
9601 // (select (setcc lhs, rhs, cc), truev, falsev)
9602 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
9603 SDValue LHS = CondV.getOperand(0);
9604 SDValue RHS = CondV.getOperand(1);
9605 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9606
9607 // Special case for a select of 2 constants that have a difference of 1.
9608 // Normally this is done by DAGCombine, but if the select is introduced by
9609 // type legalization or op legalization, we miss it. Restricting to SETLT
9610 // case for now because that is what signed saturating add/sub need.
9611 // FIXME: We don't need the condition to be SETLT or even a SETCC,
9612 // but we would probably want to swap the true/false values if the condition
9613 // is SETGE/SETLE to avoid an XORI.
9614 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
9615 CCVal == ISD::SETLT) {
9616 const APInt &TrueVal = TrueV->getAsAPIntVal();
9617 const APInt &FalseVal = FalseV->getAsAPIntVal();
9618 if (TrueVal - 1 == FalseVal)
9619 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
9620 if (TrueVal + 1 == FalseVal)
9621 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
9622 }
9623
9624 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG, Subtarget);
9625 // 1 < x ? x : 1 -> 0 < x ? x : 1
9626 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
9627 RHS == TrueV && LHS == FalseV) {
9628 LHS = DAG.getConstant(0, DL, VT);
9629 // 0 <u x is the same as x != 0.
9630 if (CCVal == ISD::SETULT) {
9631 std::swap(LHS, RHS);
9632 CCVal = ISD::SETNE;
9633 }
9634 }
9635
9636 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
9637 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
9638 RHS == FalseV) {
9639 RHS = DAG.getConstant(0, DL, VT);
9640 }
9641
9642 SDValue TargetCC = DAG.getCondCode(CCVal);
9643
9644 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
9645 // (select (setcc lhs, rhs, CC), constant, falsev)
9646 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
9647 std::swap(TrueV, FalseV);
9648 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
9649 }
9650
9651 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
9652 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
9653}
9654
9655SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
9656 SDValue CondV = Op.getOperand(1);
9657 SDLoc DL(Op);
9658 MVT XLenVT = Subtarget.getXLenVT();
9659
9660 if (CondV.getOpcode() == ISD::SETCC &&
9661 CondV.getOperand(0).getValueType() == XLenVT) {
9662 SDValue LHS = CondV.getOperand(0);
9663 SDValue RHS = CondV.getOperand(1);
9664 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9665
9666 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG, Subtarget);
9667
9668 SDValue TargetCC = DAG.getCondCode(CCVal);
9669 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
9670 LHS, RHS, TargetCC, Op.getOperand(2));
9671 }
9672
9673 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
9674 CondV, DAG.getConstant(0, DL, XLenVT),
9675 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
9676}
9677
9678SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
9679 MachineFunction &MF = DAG.getMachineFunction();
9680 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
9681
9682 SDLoc DL(Op);
9683 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
9685
9686 // vastart just stores the address of the VarArgsFrameIndex slot into the
9687 // memory location argument.
9688 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
9689 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
9690 MachinePointerInfo(SV));
9691}
9692
9693SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
9694 SelectionDAG &DAG) const {
9695 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
9696 MachineFunction &MF = DAG.getMachineFunction();
9697 MachineFrameInfo &MFI = MF.getFrameInfo();
9698 MFI.setFrameAddressIsTaken(true);
9699 Register FrameReg = RI.getFrameRegister(MF);
9700 int XLenInBytes = Subtarget.getXLen() / 8;
9701
9702 EVT VT = Op.getValueType();
9703 SDLoc DL(Op);
9704 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
9705 unsigned Depth = Op.getConstantOperandVal(0);
9706 while (Depth--) {
9707 int Offset = -(XLenInBytes * 2);
9708 SDValue Ptr = DAG.getNode(
9709 ISD::ADD, DL, VT, FrameAddr,
9711 FrameAddr =
9712 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
9713 }
9714 return FrameAddr;
9715}
9716
9717SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
9718 SelectionDAG &DAG) const {
9719 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
9720 MachineFunction &MF = DAG.getMachineFunction();
9721 MachineFrameInfo &MFI = MF.getFrameInfo();
9722 MFI.setReturnAddressIsTaken(true);
9723 MVT XLenVT = Subtarget.getXLenVT();
9724 int XLenInBytes = Subtarget.getXLen() / 8;
9725
9726 EVT VT = Op.getValueType();
9727 SDLoc DL(Op);
9728 unsigned Depth = Op.getConstantOperandVal(0);
9729 if (Depth) {
9730 int Off = -XLenInBytes;
9731 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
9732 SDValue Offset = DAG.getSignedConstant(Off, DL, VT);
9733 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
9734 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
9735 MachinePointerInfo());
9736 }
9737
9738 // Return the value of the return address register, marking it an implicit
9739 // live-in.
9740 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
9741 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
9742}
9743
9744SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
9745 SelectionDAG &DAG) const {
9746 SDLoc DL(Op);
9747 SDValue Lo = Op.getOperand(0);
9748 SDValue Hi = Op.getOperand(1);
9749 SDValue Shamt = Op.getOperand(2);
9750 EVT VT = Lo.getValueType();
9751
9752 // if Shamt-XLEN < 0: // Shamt < XLEN
9753 // Lo = Lo << Shamt
9754 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
9755 // else:
9756 // Lo = 0
9757 // Hi = Lo << (Shamt-XLEN)
9758
9759 SDValue Zero = DAG.getConstant(0, DL, VT);
9760 SDValue One = DAG.getConstant(1, DL, VT);
9761 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
9762 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
9763 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
9764 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
9765
9766 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
9767 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
9768 SDValue ShiftRightLo =
9769 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
9770 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
9771 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
9772 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
9773
9774 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
9775
9776 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
9777 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
9778
9779 SDValue Parts[2] = {Lo, Hi};
9780 return DAG.getMergeValues(Parts, DL);
9781}
9782
9783SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
9784 bool IsSRA) const {
9785 SDLoc DL(Op);
9786 SDValue Lo = Op.getOperand(0);
9787 SDValue Hi = Op.getOperand(1);
9788 SDValue Shamt = Op.getOperand(2);
9789 EVT VT = Lo.getValueType();
9790
9791 // SRA expansion:
9792 // if Shamt-XLEN < 0: // Shamt < XLEN
9793 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
9794 // Hi = Hi >>s Shamt
9795 // else:
9796 // Lo = Hi >>s (Shamt-XLEN);
9797 // Hi = Hi >>s (XLEN-1)
9798 //
9799 // SRL expansion:
9800 // if Shamt-XLEN < 0: // Shamt < XLEN
9801 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
9802 // Hi = Hi >>u Shamt
9803 // else:
9804 // Lo = Hi >>u (Shamt-XLEN);
9805 // Hi = 0;
9806
9807 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
9808
9809 SDValue Zero = DAG.getConstant(0, DL, VT);
9810 SDValue One = DAG.getConstant(1, DL, VT);
9811 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
9812 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
9813 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
9814 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
9815
9816 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
9817 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
9818 SDValue ShiftLeftHi =
9819 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
9820 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
9821 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
9822 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
9823 SDValue HiFalse =
9824 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
9825
9826 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
9827
9828 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
9829 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
9830
9831 SDValue Parts[2] = {Lo, Hi};
9832 return DAG.getMergeValues(Parts, DL);
9833}
9834
9835// Lower splats of i1 types to SETCC. For each mask vector type, we have a
9836// legal equivalently-sized i8 type, so we can use that as a go-between.
9837SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
9838 SelectionDAG &DAG) const {
9839 SDLoc DL(Op);
9840 MVT VT = Op.getSimpleValueType();
9841 SDValue SplatVal = Op.getOperand(0);
9842 // All-zeros or all-ones splats are handled specially.
9843 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
9844 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
9845 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
9846 }
9847 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
9848 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
9849 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
9850 }
9851 MVT InterVT = VT.changeVectorElementType(MVT::i8);
9852 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
9853 DAG.getConstant(1, DL, SplatVal.getValueType()));
9854 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
9855 SDValue Zero = DAG.getConstant(0, DL, InterVT);
9856 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
9857}
9858
9859// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
9860// illegal (currently only vXi64 RV32).
9861// FIXME: We could also catch non-constant sign-extended i32 values and lower
9862// them to VMV_V_X_VL.
9863SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
9864 SelectionDAG &DAG) const {
9865 SDLoc DL(Op);
9866 MVT VecVT = Op.getSimpleValueType();
9867 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
9868 "Unexpected SPLAT_VECTOR_PARTS lowering");
9869
9870 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
9871 SDValue Lo = Op.getOperand(0);
9872 SDValue Hi = Op.getOperand(1);
9873
9874 MVT ContainerVT = VecVT;
9875 if (VecVT.isFixedLengthVector())
9876 ContainerVT = getContainerForFixedLengthVector(VecVT);
9877
9878 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
9879
9880 SDValue Res =
9881 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
9882
9883 if (VecVT.isFixedLengthVector())
9884 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
9885
9886 return Res;
9887}
9888
9889// Custom-lower extensions from mask vectors by using a vselect either with 1
9890// for zero/any-extension or -1 for sign-extension:
9891// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
9892// Note that any-extension is lowered identically to zero-extension.
9893SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
9894 int64_t ExtTrueVal) const {
9895 SDLoc DL(Op);
9896 MVT VecVT = Op.getSimpleValueType();
9897 SDValue Src = Op.getOperand(0);
9898 // Only custom-lower extensions from mask types
9899 assert(Src.getValueType().isVector() &&
9900 Src.getValueType().getVectorElementType() == MVT::i1);
9901
9902 if (VecVT.isScalableVector()) {
9903 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
9904 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, VecVT);
9905 if (Src.getOpcode() == ISD::XOR &&
9906 ISD::isConstantSplatVectorAllOnes(Src.getOperand(1).getNode()))
9907 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src.getOperand(0), SplatZero,
9908 SplatTrueVal);
9909 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
9910 }
9911
9912 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
9913 MVT I1ContainerVT =
9914 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
9915
9916 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
9917
9918 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
9919
9920 MVT XLenVT = Subtarget.getXLenVT();
9921 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
9922 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, XLenVT);
9923
9924 if (Src.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
9925 SDValue Xor = Src.getOperand(0);
9926 if (Xor.getOpcode() == RISCVISD::VMXOR_VL) {
9927 SDValue ScalableOnes = Xor.getOperand(1);
9928 if (ScalableOnes.getOpcode() == ISD::INSERT_SUBVECTOR &&
9929 ScalableOnes.getOperand(0).isUndef() &&
9931 ScalableOnes.getOperand(1).getNode())) {
9932 CC = Xor.getOperand(0);
9933 std::swap(SplatZero, SplatTrueVal);
9934 }
9935 }
9936 }
9937
9938 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
9939 DAG.getUNDEF(ContainerVT), SplatZero, VL);
9940 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
9941 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
9942 SDValue Select =
9943 DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal,
9944 SplatZero, DAG.getUNDEF(ContainerVT), VL);
9945
9946 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
9947}
9948
9949// Custom-lower truncations from vectors to mask vectors by using a mask and a
9950// setcc operation:
9951// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
9952SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
9953 SelectionDAG &DAG) const {
9954 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
9955 SDLoc DL(Op);
9956 EVT MaskVT = Op.getValueType();
9957 // Only expect to custom-lower truncations to mask types
9958 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
9959 "Unexpected type for vector mask lowering");
9960 SDValue Src = Op.getOperand(0);
9961 MVT VecVT = Src.getSimpleValueType();
9962 SDValue Mask, VL;
9963 if (IsVPTrunc) {
9964 Mask = Op.getOperand(1);
9965 VL = Op.getOperand(2);
9966 }
9967 // If this is a fixed vector, we need to convert it to a scalable vector.
9968 MVT ContainerVT = VecVT;
9969
9970 if (VecVT.isFixedLengthVector()) {
9971 ContainerVT = getContainerForFixedLengthVector(VecVT);
9972 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
9973 if (IsVPTrunc) {
9974 MVT MaskContainerVT =
9975 getContainerForFixedLengthVector(Mask.getSimpleValueType());
9976 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
9977 }
9978 }
9979
9980 if (!IsVPTrunc) {
9981 std::tie(Mask, VL) =
9982 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9983 }
9984
9985 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
9986 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
9987
9988 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
9989 DAG.getUNDEF(ContainerVT), SplatOne, VL);
9990 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
9991 DAG.getUNDEF(ContainerVT), SplatZero, VL);
9992
9993 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
9994 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
9995 DAG.getUNDEF(ContainerVT), Mask, VL);
9996 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
9997 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
9998 DAG.getUNDEF(MaskContainerVT), Mask, VL});
9999 if (MaskVT.isFixedLengthVector())
10000 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
10001 return Trunc;
10002}
10003
10004SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
10005 SelectionDAG &DAG) const {
10006 unsigned Opc = Op.getOpcode();
10007 bool IsVPTrunc = Opc == ISD::VP_TRUNCATE;
10008 SDLoc DL(Op);
10009
10010 MVT VT = Op.getSimpleValueType();
10011 // Only custom-lower vector truncates
10012 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
10013
10014 // Truncates to mask types are handled differently
10015 if (VT.getVectorElementType() == MVT::i1)
10016 return lowerVectorMaskTruncLike(Op, DAG);
10017
10018 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
10019 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
10020 // truncate by one power of two at a time.
10021 MVT DstEltVT = VT.getVectorElementType();
10022
10023 SDValue Src = Op.getOperand(0);
10024 MVT SrcVT = Src.getSimpleValueType();
10025 MVT SrcEltVT = SrcVT.getVectorElementType();
10026
10027 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
10028 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
10029 "Unexpected vector truncate lowering");
10030
10031 MVT ContainerVT = SrcVT;
10032 SDValue Mask, VL;
10033 if (IsVPTrunc) {
10034 Mask = Op.getOperand(1);
10035 VL = Op.getOperand(2);
10036 }
10037 if (SrcVT.isFixedLengthVector()) {
10038 ContainerVT = getContainerForFixedLengthVector(SrcVT);
10039 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
10040 if (IsVPTrunc) {
10041 MVT MaskVT = getMaskTypeFor(ContainerVT);
10042 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10043 }
10044 }
10045
10046 SDValue Result = Src;
10047 if (!IsVPTrunc) {
10048 std::tie(Mask, VL) =
10049 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
10050 }
10051
10052 unsigned NewOpc;
10054 NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_SSAT;
10055 else if (Opc == ISD::TRUNCATE_USAT_U)
10056 NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_USAT;
10057 else
10058 NewOpc = RISCVISD::TRUNCATE_VECTOR_VL;
10059
10060 do {
10061 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
10062 MVT ResultVT = ContainerVT.changeVectorElementType(SrcEltVT);
10063 Result = DAG.getNode(NewOpc, DL, ResultVT, Result, Mask, VL);
10064 } while (SrcEltVT != DstEltVT);
10065
10066 if (SrcVT.isFixedLengthVector())
10067 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
10068
10069 return Result;
10070}
10071
10072SDValue
10073RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
10074 SelectionDAG &DAG) const {
10075 SDLoc DL(Op);
10076 SDValue Chain = Op.getOperand(0);
10077 SDValue Src = Op.getOperand(1);
10078 MVT VT = Op.getSimpleValueType();
10079 MVT SrcVT = Src.getSimpleValueType();
10080 MVT ContainerVT = VT;
10081 if (VT.isFixedLengthVector()) {
10082 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
10083 ContainerVT =
10084 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
10085 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
10086 }
10087
10088 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
10089
10090 // RVV can only widen/truncate fp to types double/half the size as the source.
10091 if ((VT.getVectorElementType() == MVT::f64 &&
10092 (SrcVT.getVectorElementType() == MVT::f16 ||
10093 SrcVT.getVectorElementType() == MVT::bf16)) ||
10094 ((VT.getVectorElementType() == MVT::f16 ||
10095 VT.getVectorElementType() == MVT::bf16) &&
10096 SrcVT.getVectorElementType() == MVT::f64)) {
10097 // For double rounding, the intermediate rounding should be round-to-odd.
10098 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
10099 ? RISCVISD::STRICT_FP_EXTEND_VL
10100 : RISCVISD::STRICT_VFNCVT_ROD_VL;
10101 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
10102 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
10103 Chain, Src, Mask, VL);
10104 Chain = Src.getValue(1);
10105 }
10106
10107 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
10108 ? RISCVISD::STRICT_FP_EXTEND_VL
10109 : RISCVISD::STRICT_FP_ROUND_VL;
10110 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
10111 Chain, Src, Mask, VL);
10112 if (VT.isFixedLengthVector()) {
10113 // StrictFP operations have two result values. Their lowered result should
10114 // have same result count.
10115 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
10116 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
10117 }
10118 return Res;
10119}
10120
10121SDValue
10122RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
10123 SelectionDAG &DAG) const {
10124 bool IsVP =
10125 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
10126 bool IsExtend =
10127 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
10128 // RVV can only do truncate fp to types half the size as the source. We
10129 // custom-lower f64->f16 rounds via RVV's round-to-odd float
10130 // conversion instruction.
10131 SDLoc DL(Op);
10132 MVT VT = Op.getSimpleValueType();
10133
10134 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
10135
10136 SDValue Src = Op.getOperand(0);
10137 MVT SrcVT = Src.getSimpleValueType();
10138
10139 bool IsDirectExtend =
10140 IsExtend && (VT.getVectorElementType() != MVT::f64 ||
10141 (SrcVT.getVectorElementType() != MVT::f16 &&
10142 SrcVT.getVectorElementType() != MVT::bf16));
10143 bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 &&
10144 VT.getVectorElementType() != MVT::bf16) ||
10145 SrcVT.getVectorElementType() != MVT::f64);
10146
10147 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
10148
10149 // We have regular SD node patterns for direct non-VL extends.
10150 if (VT.isScalableVector() && IsDirectConv && !IsVP)
10151 return Op;
10152
10153 // Prepare any fixed-length vector operands.
10154 MVT ContainerVT = VT;
10155 SDValue Mask, VL;
10156 if (IsVP) {
10157 Mask = Op.getOperand(1);
10158 VL = Op.getOperand(2);
10159 }
10160 if (VT.isFixedLengthVector()) {
10161 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
10162 ContainerVT =
10163 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
10164 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
10165 if (IsVP) {
10166 MVT MaskVT = getMaskTypeFor(ContainerVT);
10167 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10168 }
10169 }
10170
10171 if (!IsVP)
10172 std::tie(Mask, VL) =
10173 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
10174
10175 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
10176
10177 if (IsDirectConv) {
10178 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
10179 if (VT.isFixedLengthVector())
10180 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
10181 return Src;
10182 }
10183
10184 unsigned InterConvOpc =
10185 IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::VFNCVT_ROD_VL;
10186
10187 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
10188 SDValue IntermediateConv =
10189 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
10190 SDValue Result =
10191 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
10192 if (VT.isFixedLengthVector())
10193 return convertFromScalableVector(VT, Result, DAG, Subtarget);
10194 return Result;
10195}
10196
10197// Given a scalable vector type and an index into it, returns the type for the
10198// smallest subvector that the index fits in. This can be used to reduce LMUL
10199// for operations like vslidedown.
10200//
10201// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
10202static std::optional<MVT>
10203getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
10204 const RISCVSubtarget &Subtarget) {
10205 assert(VecVT.isScalableVector());
10206 const unsigned EltSize = VecVT.getScalarSizeInBits();
10207 const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
10208 const unsigned MinVLMAX = VectorBitsMin / EltSize;
10209 MVT SmallerVT;
10210 if (MaxIdx < MinVLMAX)
10211 SmallerVT = RISCVTargetLowering::getM1VT(VecVT);
10212 else if (MaxIdx < MinVLMAX * 2)
10213 SmallerVT =
10215 else if (MaxIdx < MinVLMAX * 4)
10216 SmallerVT = RISCVTargetLowering::getM1VT(VecVT)
10219 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
10220 return std::nullopt;
10221 return SmallerVT;
10222}
10223
10225 auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
10226 if (!IdxC || isNullConstant(Idx))
10227 return false;
10228 return isUInt<5>(IdxC->getZExtValue());
10229}
10230
10231// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
10232// first position of a vector, and that vector is slid up to the insert index.
10233// By limiting the active vector length to index+1 and merging with the
10234// original vector (with an undisturbed tail policy for elements >= VL), we
10235// achieve the desired result of leaving all elements untouched except the one
10236// at VL-1, which is replaced with the desired value.
10237SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
10238 SelectionDAG &DAG) const {
10239 SDLoc DL(Op);
10240 MVT VecVT = Op.getSimpleValueType();
10241 MVT XLenVT = Subtarget.getXLenVT();
10242 SDValue Vec = Op.getOperand(0);
10243 SDValue Val = Op.getOperand(1);
10244 MVT ValVT = Val.getSimpleValueType();
10245 SDValue Idx = Op.getOperand(2);
10246
10247 if (VecVT.getVectorElementType() == MVT::i1) {
10248 // FIXME: For now we just promote to an i8 vector and insert into that,
10249 // but this is probably not optimal.
10250 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
10251 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
10252 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
10253 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
10254 }
10255
10256 if ((ValVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
10257 ValVT == MVT::bf16) {
10258 // If we don't have vfmv.s.f for f16/bf16, use fmv.x.h first.
10259 MVT IntVT = VecVT.changeTypeToInteger();
10260 SDValue IntInsert = DAG.getNode(
10261 ISD::INSERT_VECTOR_ELT, DL, IntVT, DAG.getBitcast(IntVT, Vec),
10262 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Val), Idx);
10263 return DAG.getBitcast(VecVT, IntInsert);
10264 }
10265
10266 MVT ContainerVT = VecVT;
10267 // If the operand is a fixed-length vector, convert to a scalable one.
10268 if (VecVT.isFixedLengthVector()) {
10269 ContainerVT = getContainerForFixedLengthVector(VecVT);
10270 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10271 }
10272
10273 // If we know the index we're going to insert at, we can shrink Vec so that
10274 // we're performing the scalar inserts and slideup on a smaller LMUL.
10275 SDValue OrigVec = Vec;
10276 std::optional<unsigned> AlignedIdx;
10277 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
10278 const unsigned OrigIdx = IdxC->getZExtValue();
10279 // Do we know an upper bound on LMUL?
10280 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
10281 DL, DAG, Subtarget)) {
10282 ContainerVT = *ShrunkVT;
10283 AlignedIdx = 0;
10284 }
10285
10286 // If we're compiling for an exact VLEN value, we can always perform
10287 // the insert in m1 as we can determine the register corresponding to
10288 // the index in the register group.
10289 const MVT M1VT = RISCVTargetLowering::getM1VT(ContainerVT);
10290 if (auto VLEN = Subtarget.getRealVLen(); VLEN && ContainerVT.bitsGT(M1VT)) {
10291 EVT ElemVT = VecVT.getVectorElementType();
10292 unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
10293 unsigned RemIdx = OrigIdx % ElemsPerVReg;
10294 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
10295 AlignedIdx = SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
10296 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
10297 ContainerVT = M1VT;
10298 }
10299
10300 if (AlignedIdx)
10301 Vec = DAG.getExtractSubvector(DL, ContainerVT, Vec, *AlignedIdx);
10302 }
10303
10304 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
10305 // Even i64-element vectors on RV32 can be lowered without scalar
10306 // legalization if the most-significant 32 bits of the value are not affected
10307 // by the sign-extension of the lower 32 bits.
10308 // TODO: We could also catch sign extensions of a 32-bit value.
10309 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
10310 const auto *CVal = cast<ConstantSDNode>(Val);
10311 if (isInt<32>(CVal->getSExtValue())) {
10312 IsLegalInsert = true;
10313 Val = DAG.getSignedConstant(CVal->getSExtValue(), DL, MVT::i32);
10314 }
10315 }
10316
10317 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10318
10319 SDValue ValInVec;
10320
10321 if (IsLegalInsert) {
10322 unsigned Opc =
10323 VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
10324 if (isNullConstant(Idx)) {
10325 if (!VecVT.isFloatingPoint())
10326 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
10327 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
10328
10329 if (AlignedIdx)
10330 Vec = DAG.getInsertSubvector(DL, OrigVec, Vec, *AlignedIdx);
10331 if (!VecVT.isFixedLengthVector())
10332 return Vec;
10333 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
10334 }
10335
10336 // Use ri.vinsert.v.x if available.
10337 if (Subtarget.hasVendorXRivosVisni() && VecVT.isInteger() &&
10339 // Tail policy applies to elements past VLMAX (by assumption Idx < VLMAX)
10340 SDValue PolicyOp =
10342 Vec = DAG.getNode(RISCVISD::RI_VINSERT_VL, DL, ContainerVT, Vec, Val, Idx,
10343 VL, PolicyOp);
10344 if (AlignedIdx)
10345 Vec = DAG.getInsertSubvector(DL, OrigVec, Vec, *AlignedIdx);
10346 if (!VecVT.isFixedLengthVector())
10347 return Vec;
10348 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
10349 }
10350
10351 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
10352 } else {
10353 // On RV32, i64-element vectors must be specially handled to place the
10354 // value at element 0, by using two vslide1down instructions in sequence on
10355 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
10356 // this.
10357 SDValue ValLo, ValHi;
10358 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
10359 MVT I32ContainerVT =
10360 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
10361 SDValue I32Mask =
10362 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
10363 // Limit the active VL to two.
10364 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
10365 // If the Idx is 0 we can insert directly into the vector.
10366 if (isNullConstant(Idx)) {
10367 // First slide in the lo value, then the hi in above it. We use slide1down
10368 // to avoid the register group overlap constraint of vslide1up.
10369 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10370 Vec, Vec, ValLo, I32Mask, InsertI64VL);
10371 // If the source vector is undef don't pass along the tail elements from
10372 // the previous slide1down.
10373 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
10374 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10375 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
10376 // Bitcast back to the right container type.
10377 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
10378
10379 if (AlignedIdx)
10380 ValInVec = DAG.getInsertSubvector(DL, OrigVec, ValInVec, *AlignedIdx);
10381 if (!VecVT.isFixedLengthVector())
10382 return ValInVec;
10383 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
10384 }
10385
10386 // First slide in the lo value, then the hi in above it. We use slide1down
10387 // to avoid the register group overlap constraint of vslide1up.
10388 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10389 DAG.getUNDEF(I32ContainerVT),
10390 DAG.getUNDEF(I32ContainerVT), ValLo,
10391 I32Mask, InsertI64VL);
10392 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10393 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
10394 I32Mask, InsertI64VL);
10395 // Bitcast back to the right container type.
10396 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
10397 }
10398
10399 // Now that the value is in a vector, slide it into position.
10400 SDValue InsertVL =
10401 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
10402
10403 // Use tail agnostic policy if Idx is the last index of Vec.
10405 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
10406 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
10408 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
10409 Idx, Mask, InsertVL, Policy);
10410
10411 if (AlignedIdx)
10412 Slideup = DAG.getInsertSubvector(DL, OrigVec, Slideup, *AlignedIdx);
10413 if (!VecVT.isFixedLengthVector())
10414 return Slideup;
10415 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
10416}
10417
10418// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
10419// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
10420// types this is done using VMV_X_S to allow us to glean information about the
10421// sign bits of the result.
10422SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
10423 SelectionDAG &DAG) const {
10424 SDLoc DL(Op);
10425 SDValue Idx = Op.getOperand(1);
10426 SDValue Vec = Op.getOperand(0);
10427 EVT EltVT = Op.getValueType();
10428 MVT VecVT = Vec.getSimpleValueType();
10429 MVT XLenVT = Subtarget.getXLenVT();
10430
10431 if (VecVT.getVectorElementType() == MVT::i1) {
10432 // Use vfirst.m to extract the first bit.
10433 if (isNullConstant(Idx)) {
10434 MVT ContainerVT = VecVT;
10435 if (VecVT.isFixedLengthVector()) {
10436 ContainerVT = getContainerForFixedLengthVector(VecVT);
10437 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10438 }
10439 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10440 SDValue Vfirst =
10441 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
10442 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
10443 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
10444 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
10445 }
10446 if (VecVT.isFixedLengthVector()) {
10447 unsigned NumElts = VecVT.getVectorNumElements();
10448 if (NumElts >= 8) {
10449 MVT WideEltVT;
10450 unsigned WidenVecLen;
10451 SDValue ExtractElementIdx;
10452 SDValue ExtractBitIdx;
10453 unsigned MaxEEW = Subtarget.getELen();
10454 MVT LargestEltVT = MVT::getIntegerVT(
10455 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
10456 if (NumElts <= LargestEltVT.getSizeInBits()) {
10457 assert(isPowerOf2_32(NumElts) &&
10458 "the number of elements should be power of 2");
10459 WideEltVT = MVT::getIntegerVT(NumElts);
10460 WidenVecLen = 1;
10461 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
10462 ExtractBitIdx = Idx;
10463 } else {
10464 WideEltVT = LargestEltVT;
10465 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
10466 // extract element index = index / element width
10467 ExtractElementIdx = DAG.getNode(
10468 ISD::SRL, DL, XLenVT, Idx,
10469 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
10470 // mask bit index = index % element width
10471 ExtractBitIdx = DAG.getNode(
10472 ISD::AND, DL, XLenVT, Idx,
10473 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
10474 }
10475 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
10476 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
10477 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
10478 Vec, ExtractElementIdx);
10479 // Extract the bit from GPR.
10480 SDValue ShiftRight =
10481 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
10482 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
10483 DAG.getConstant(1, DL, XLenVT));
10484 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
10485 }
10486 }
10487 // Otherwise, promote to an i8 vector and extract from that.
10488 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
10489 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
10490 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
10491 }
10492
10493 if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
10494 EltVT == MVT::bf16) {
10495 // If we don't have vfmv.f.s for f16/bf16, extract to a gpr then use fmv.h.x
10496 MVT IntVT = VecVT.changeTypeToInteger();
10497 SDValue IntVec = DAG.getBitcast(IntVT, Vec);
10498 SDValue IntExtract =
10499 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT, IntVec, Idx);
10500 return DAG.getNode(RISCVISD::FMV_H_X, DL, EltVT, IntExtract);
10501 }
10502
10503 // If this is a fixed vector, we need to convert it to a scalable vector.
10504 MVT ContainerVT = VecVT;
10505 if (VecVT.isFixedLengthVector()) {
10506 ContainerVT = getContainerForFixedLengthVector(VecVT);
10507 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10508 }
10509
10510 // If we're compiling for an exact VLEN value and we have a known
10511 // constant index, we can always perform the extract in m1 (or
10512 // smaller) as we can determine the register corresponding to
10513 // the index in the register group.
10514 const auto VLen = Subtarget.getRealVLen();
10515 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
10516 IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
10517 MVT M1VT = RISCVTargetLowering::getM1VT(ContainerVT);
10518 unsigned OrigIdx = IdxC->getZExtValue();
10519 EVT ElemVT = VecVT.getVectorElementType();
10520 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
10521 unsigned RemIdx = OrigIdx % ElemsPerVReg;
10522 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
10523 unsigned ExtractIdx =
10524 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
10525 Vec = DAG.getExtractSubvector(DL, M1VT, Vec, ExtractIdx);
10526 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
10527 ContainerVT = M1VT;
10528 }
10529
10530 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
10531 // contains our index.
10532 std::optional<uint64_t> MaxIdx;
10533 if (VecVT.isFixedLengthVector())
10534 MaxIdx = VecVT.getVectorNumElements() - 1;
10535 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
10536 MaxIdx = IdxC->getZExtValue();
10537 if (MaxIdx) {
10538 if (auto SmallerVT =
10539 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
10540 ContainerVT = *SmallerVT;
10541 Vec = DAG.getExtractSubvector(DL, ContainerVT, Vec, 0);
10542 }
10543 }
10544
10545 // Use ri.vextract.x.v if available.
10546 // TODO: Avoid index 0 and just use the vmv.x.s
10547 if (Subtarget.hasVendorXRivosVisni() && EltVT.isInteger() &&
10549 SDValue Elt = DAG.getNode(RISCVISD::RI_VEXTRACT, DL, XLenVT, Vec, Idx);
10550 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt);
10551 }
10552
10553 // If after narrowing, the required slide is still greater than LMUL2,
10554 // fallback to generic expansion and go through the stack. This is done
10555 // for a subtle reason: extracting *all* elements out of a vector is
10556 // widely expected to be linear in vector size, but because vslidedown
10557 // is linear in LMUL, performing N extracts using vslidedown becomes
10558 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
10559 // seems to have the same problem (the store is linear in LMUL), but the
10560 // generic expansion *memoizes* the store, and thus for many extracts of
10561 // the same vector we end up with one store and a bunch of loads.
10562 // TODO: We don't have the same code for insert_vector_elt because we
10563 // have BUILD_VECTOR and handle the degenerate case there. Should we
10564 // consider adding an inverse BUILD_VECTOR node?
10565 MVT LMUL2VT =
10567 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
10568 return SDValue();
10569
10570 // If the index is 0, the vector is already in the right position.
10571 if (!isNullConstant(Idx)) {
10572 // Use a VL of 1 to avoid processing more elements than we need.
10573 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
10574 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
10575 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
10576 }
10577
10578 if (!EltVT.isInteger()) {
10579 // Floating-point extracts are handled in TableGen.
10580 return DAG.getExtractVectorElt(DL, EltVT, Vec, 0);
10581 }
10582
10583 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
10584 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
10585}
10586
10587// Some RVV intrinsics may claim that they want an integer operand to be
10588// promoted or expanded.
10590 const RISCVSubtarget &Subtarget) {
10591 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
10592 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
10593 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
10594 "Unexpected opcode");
10595
10596 if (!Subtarget.hasVInstructions())
10597 return SDValue();
10598
10599 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
10600 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
10601 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
10602
10603 SDLoc DL(Op);
10604
10606 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
10607 if (!II || !II->hasScalarOperand())
10608 return SDValue();
10609
10610 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
10611 assert(SplatOp < Op.getNumOperands());
10612
10613 SmallVector<SDValue, 8> Operands(Op->ops());
10614 SDValue &ScalarOp = Operands[SplatOp];
10615 MVT OpVT = ScalarOp.getSimpleValueType();
10616 MVT XLenVT = Subtarget.getXLenVT();
10617
10618 // If this isn't a scalar, or its type is XLenVT we're done.
10619 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
10620 return SDValue();
10621
10622 // Simplest case is that the operand needs to be promoted to XLenVT.
10623 if (OpVT.bitsLT(XLenVT)) {
10624 // If the operand is a constant, sign extend to increase our chances
10625 // of being able to use a .vi instruction. ANY_EXTEND would become a
10626 // a zero extend and the simm5 check in isel would fail.
10627 // FIXME: Should we ignore the upper bits in isel instead?
10628 unsigned ExtOpc =
10630 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
10631 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
10632 }
10633
10634 // Use the previous operand to get the vXi64 VT. The result might be a mask
10635 // VT for compares. Using the previous operand assumes that the previous
10636 // operand will never have a smaller element size than a scalar operand and
10637 // that a widening operation never uses SEW=64.
10638 // NOTE: If this fails the below assert, we can probably just find the
10639 // element count from any operand or result and use it to construct the VT.
10640 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
10641 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
10642
10643 // The more complex case is when the scalar is larger than XLenVT.
10644 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
10645 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
10646
10647 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
10648 // instruction to sign-extend since SEW>XLEN.
10649 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
10650 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
10651 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
10652 }
10653
10654 switch (IntNo) {
10655 case Intrinsic::riscv_vslide1up:
10656 case Intrinsic::riscv_vslide1down:
10657 case Intrinsic::riscv_vslide1up_mask:
10658 case Intrinsic::riscv_vslide1down_mask: {
10659 // We need to special case these when the scalar is larger than XLen.
10660 unsigned NumOps = Op.getNumOperands();
10661 bool IsMasked = NumOps == 7;
10662
10663 // Convert the vector source to the equivalent nxvXi32 vector.
10664 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
10665 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
10666 SDValue ScalarLo, ScalarHi;
10667 std::tie(ScalarLo, ScalarHi) =
10668 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
10669
10670 // Double the VL since we halved SEW.
10671 SDValue AVL = getVLOperand(Op);
10672 SDValue I32VL;
10673
10674 // Optimize for constant AVL
10675 if (isa<ConstantSDNode>(AVL)) {
10676 const auto [MinVLMAX, MaxVLMAX] =
10678
10679 uint64_t AVLInt = AVL->getAsZExtVal();
10680 if (AVLInt <= MinVLMAX) {
10681 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
10682 } else if (AVLInt >= 2 * MaxVLMAX) {
10683 // Just set vl to VLMAX in this situation
10684 I32VL = DAG.getRegister(RISCV::X0, XLenVT);
10685 } else {
10686 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
10687 // is related to the hardware implementation.
10688 // So let the following code handle
10689 }
10690 }
10691 if (!I32VL) {
10693 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
10694 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
10695 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
10696 SDValue SETVL =
10697 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
10698 // Using vsetvli instruction to get actually used length which related to
10699 // the hardware implementation
10700 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
10701 SEW, LMUL);
10702 I32VL =
10703 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
10704 }
10705
10706 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
10707
10708 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
10709 // instructions.
10710 SDValue Passthru;
10711 if (IsMasked)
10712 Passthru = DAG.getUNDEF(I32VT);
10713 else
10714 Passthru = DAG.getBitcast(I32VT, Operands[1]);
10715
10716 if (IntNo == Intrinsic::riscv_vslide1up ||
10717 IntNo == Intrinsic::riscv_vslide1up_mask) {
10718 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
10719 ScalarHi, I32Mask, I32VL);
10720 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
10721 ScalarLo, I32Mask, I32VL);
10722 } else {
10723 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
10724 ScalarLo, I32Mask, I32VL);
10725 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
10726 ScalarHi, I32Mask, I32VL);
10727 }
10728
10729 // Convert back to nxvXi64.
10730 Vec = DAG.getBitcast(VT, Vec);
10731
10732 if (!IsMasked)
10733 return Vec;
10734 // Apply mask after the operation.
10735 SDValue Mask = Operands[NumOps - 3];
10736 SDValue MaskedOff = Operands[1];
10737 // Assume Policy operand is the last operand.
10738 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();
10739 // We don't need to select maskedoff if it's undef.
10740 if (MaskedOff.isUndef())
10741 return Vec;
10742 // TAMU
10743 if (Policy == RISCVVType::TAIL_AGNOSTIC)
10744 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
10745 DAG.getUNDEF(VT), AVL);
10746 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
10747 // It's fine because vmerge does not care mask policy.
10748 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
10749 MaskedOff, AVL);
10750 }
10751 }
10752
10753 // We need to convert the scalar to a splat vector.
10754 SDValue VL = getVLOperand(Op);
10755 assert(VL.getValueType() == XLenVT);
10756 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
10757 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
10758}
10759
10760// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
10761// scalable vector llvm.get.vector.length for now.
10762//
10763// We need to convert from a scalable VF to a vsetvli with VLMax equal to
10764// (vscale * VF). The vscale and VF are independent of element width. We use
10765// SEW=8 for the vsetvli because it is the only element width that supports all
10766// fractional LMULs. The LMUL is chosen so that with SEW=8 the VLMax is
10767// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
10768// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
10769// SEW and LMUL are better for the surrounding vector instructions.
10771 const RISCVSubtarget &Subtarget) {
10772 MVT XLenVT = Subtarget.getXLenVT();
10773
10774 // The smallest LMUL is only valid for the smallest element width.
10775 const unsigned ElementWidth = 8;
10776
10777 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
10778 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
10779 // We don't support VF==1 with ELEN==32.
10780 [[maybe_unused]] unsigned MinVF =
10781 RISCV::RVVBitsPerBlock / Subtarget.getELen();
10782
10783 [[maybe_unused]] unsigned VF = N->getConstantOperandVal(2);
10784 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
10785 "Unexpected VF");
10786
10787 bool Fractional = VF < LMul1VF;
10788 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
10789 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
10790 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
10791
10792 SDLoc DL(N);
10793
10794 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
10795 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
10796
10797 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
10798
10799 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
10800 SDValue Res =
10801 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
10802 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
10803}
10804
10806 const RISCVSubtarget &Subtarget) {
10807 SDValue Op0 = N->getOperand(1);
10808 MVT OpVT = Op0.getSimpleValueType();
10809 MVT ContainerVT = OpVT;
10810 if (OpVT.isFixedLengthVector()) {
10811 ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget);
10812 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
10813 }
10814 MVT XLenVT = Subtarget.getXLenVT();
10815 SDLoc DL(N);
10816 auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);
10817 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);
10818 if (isOneConstant(N->getOperand(2)))
10819 return Res;
10820
10821 // Convert -1 to VL.
10822 SDValue Setcc =
10823 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
10824 VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());
10825 return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);
10826}
10827
10828static inline void promoteVCIXScalar(SDValue Op,
10829 MutableArrayRef<SDValue> Operands,
10830 SelectionDAG &DAG) {
10831 const RISCVSubtarget &Subtarget =
10833
10834 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
10835 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
10836 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
10837 SDLoc DL(Op);
10838
10840 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
10841 if (!II || !II->hasScalarOperand())
10842 return;
10843
10844 unsigned SplatOp = II->ScalarOperand + 1;
10845 assert(SplatOp < Op.getNumOperands());
10846
10847 SDValue &ScalarOp = Operands[SplatOp];
10848 MVT OpVT = ScalarOp.getSimpleValueType();
10849 MVT XLenVT = Subtarget.getXLenVT();
10850
10851 // The code below is partially copied from lowerVectorIntrinsicScalars.
10852 // If this isn't a scalar, or its type is XLenVT we're done.
10853 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
10854 return;
10855
10856 // Manually emit promote operation for scalar operation.
10857 if (OpVT.bitsLT(XLenVT)) {
10858 unsigned ExtOpc =
10860 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
10861 }
10862}
10863
10864static void processVCIXOperands(SDValue OrigOp,
10865 MutableArrayRef<SDValue> Operands,
10866 SelectionDAG &DAG) {
10867 promoteVCIXScalar(OrigOp, Operands, DAG);
10868 const RISCVSubtarget &Subtarget =
10870 for (SDValue &V : Operands) {
10871 EVT ValType = V.getValueType();
10872 if (ValType.isVector() && ValType.isFloatingPoint()) {
10873 MVT InterimIVT =
10874 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
10875 ValType.getVectorElementCount());
10876 V = DAG.getBitcast(InterimIVT, V);
10877 }
10878 if (ValType.isFixedLengthVector()) {
10879 MVT OpContainerVT = getContainerForFixedLengthVector(
10880 DAG, V.getSimpleValueType(), Subtarget);
10881 V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
10882 }
10883 }
10884}
10885
10886// LMUL * VLEN should be greater than or equal to EGS * SEW
10887static inline bool isValidEGW(int EGS, EVT VT,
10888 const RISCVSubtarget &Subtarget) {
10889 return (Subtarget.getRealMinVLen() *
10891 EGS * VT.getScalarSizeInBits();
10892}
10893
10894SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10895 SelectionDAG &DAG) const {
10896 unsigned IntNo = Op.getConstantOperandVal(0);
10897 SDLoc DL(Op);
10898 MVT XLenVT = Subtarget.getXLenVT();
10899
10900 switch (IntNo) {
10901 default:
10902 break; // Don't custom lower most intrinsics.
10903 case Intrinsic::riscv_tuple_insert: {
10904 SDValue Vec = Op.getOperand(1);
10905 SDValue SubVec = Op.getOperand(2);
10906 SDValue Index = Op.getOperand(3);
10907
10908 return DAG.getNode(RISCVISD::TUPLE_INSERT, DL, Op.getValueType(), Vec,
10909 SubVec, Index);
10910 }
10911 case Intrinsic::riscv_tuple_extract: {
10912 SDValue Vec = Op.getOperand(1);
10913 SDValue Index = Op.getOperand(2);
10914
10915 return DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, Op.getValueType(), Vec,
10916 Index);
10917 }
10918 case Intrinsic::thread_pointer: {
10919 EVT PtrVT = getPointerTy(DAG.getDataLayout());
10920 return DAG.getRegister(RISCV::X4, PtrVT);
10921 }
10922 case Intrinsic::riscv_orc_b:
10923 case Intrinsic::riscv_brev8:
10924 case Intrinsic::riscv_sha256sig0:
10925 case Intrinsic::riscv_sha256sig1:
10926 case Intrinsic::riscv_sha256sum0:
10927 case Intrinsic::riscv_sha256sum1:
10928 case Intrinsic::riscv_sm3p0:
10929 case Intrinsic::riscv_sm3p1: {
10930 unsigned Opc;
10931 switch (IntNo) {
10932 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
10933 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
10934 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
10935 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
10936 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
10937 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
10938 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
10939 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
10940 }
10941
10942 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
10943 }
10944 case Intrinsic::riscv_sm4ks:
10945 case Intrinsic::riscv_sm4ed: {
10946 unsigned Opc =
10947 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
10948
10949 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
10950 Op.getOperand(3));
10951 }
10952 case Intrinsic::riscv_zip:
10953 case Intrinsic::riscv_unzip: {
10954 unsigned Opc =
10955 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
10956 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
10957 }
10958 case Intrinsic::riscv_mopr:
10959 return DAG.getNode(RISCVISD::MOP_R, DL, XLenVT, Op.getOperand(1),
10960 Op.getOperand(2));
10961
10962 case Intrinsic::riscv_moprr: {
10963 return DAG.getNode(RISCVISD::MOP_RR, DL, XLenVT, Op.getOperand(1),
10964 Op.getOperand(2), Op.getOperand(3));
10965 }
10966 case Intrinsic::riscv_clmul:
10967 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
10968 Op.getOperand(2));
10969 case Intrinsic::riscv_clmulh:
10970 case Intrinsic::riscv_clmulr: {
10971 unsigned Opc =
10972 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
10973 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
10974 }
10975 case Intrinsic::experimental_get_vector_length:
10976 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
10977 case Intrinsic::experimental_cttz_elts:
10978 return lowerCttzElts(Op.getNode(), DAG, Subtarget);
10979 case Intrinsic::riscv_vmv_x_s: {
10980 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
10981 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
10982 }
10983 case Intrinsic::riscv_vfmv_f_s:
10984 return DAG.getExtractVectorElt(DL, Op.getValueType(), Op.getOperand(1), 0);
10985 case Intrinsic::riscv_vmv_v_x:
10986 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
10987 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
10988 Subtarget);
10989 case Intrinsic::riscv_vfmv_v_f:
10990 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
10991 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
10992 case Intrinsic::riscv_vmv_s_x: {
10993 SDValue Scalar = Op.getOperand(2);
10994
10995 if (Scalar.getValueType().bitsLE(XLenVT)) {
10996 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
10997 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
10998 Op.getOperand(1), Scalar, Op.getOperand(3));
10999 }
11000
11001 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
11002
11003 // This is an i64 value that lives in two scalar registers. We have to
11004 // insert this in a convoluted way. First we build vXi64 splat containing
11005 // the two values that we assemble using some bit math. Next we'll use
11006 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
11007 // to merge element 0 from our splat into the source vector.
11008 // FIXME: This is probably not the best way to do this, but it is
11009 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
11010 // point.
11011 // sw lo, (a0)
11012 // sw hi, 4(a0)
11013 // vlse vX, (a0)
11014 //
11015 // vid.v vVid
11016 // vmseq.vx mMask, vVid, 0
11017 // vmerge.vvm vDest, vSrc, vVal, mMask
11018 MVT VT = Op.getSimpleValueType();
11019 SDValue Vec = Op.getOperand(1);
11020 SDValue VL = getVLOperand(Op);
11021
11022 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
11023 if (Op.getOperand(1).isUndef())
11024 return SplattedVal;
11025 SDValue SplattedIdx =
11026 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
11027 DAG.getConstant(0, DL, MVT::i32), VL);
11028
11029 MVT MaskVT = getMaskTypeFor(VT);
11030 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
11031 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
11032 SDValue SelectCond =
11033 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
11034 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
11035 DAG.getUNDEF(MaskVT), Mask, VL});
11036 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,
11037 Vec, DAG.getUNDEF(VT), VL);
11038 }
11039 case Intrinsic::riscv_vfmv_s_f:
11040 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(),
11041 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
11042 // EGS * EEW >= 128 bits
11043 case Intrinsic::riscv_vaesdf_vv:
11044 case Intrinsic::riscv_vaesdf_vs:
11045 case Intrinsic::riscv_vaesdm_vv:
11046 case Intrinsic::riscv_vaesdm_vs:
11047 case Intrinsic::riscv_vaesef_vv:
11048 case Intrinsic::riscv_vaesef_vs:
11049 case Intrinsic::riscv_vaesem_vv:
11050 case Intrinsic::riscv_vaesem_vs:
11051 case Intrinsic::riscv_vaeskf1:
11052 case Intrinsic::riscv_vaeskf2:
11053 case Intrinsic::riscv_vaesz_vs:
11054 case Intrinsic::riscv_vsm4k:
11055 case Intrinsic::riscv_vsm4r_vv:
11056 case Intrinsic::riscv_vsm4r_vs: {
11057 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
11058 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
11059 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
11060 reportFatalUsageError("EGW should be greater than or equal to 4 * SEW.");
11061 return Op;
11062 }
11063 // EGS * EEW >= 256 bits
11064 case Intrinsic::riscv_vsm3c:
11065 case Intrinsic::riscv_vsm3me: {
11066 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
11067 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
11068 reportFatalUsageError("EGW should be greater than or equal to 8 * SEW.");
11069 return Op;
11070 }
11071 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
11072 case Intrinsic::riscv_vsha2ch:
11073 case Intrinsic::riscv_vsha2cl:
11074 case Intrinsic::riscv_vsha2ms: {
11075 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
11076 !Subtarget.hasStdExtZvknhb())
11077 reportFatalUsageError("SEW=64 needs Zvknhb to be enabled.");
11078 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
11079 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
11080 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
11081 reportFatalUsageError("EGW should be greater than or equal to 4 * SEW.");
11082 return Op;
11083 }
11084 case Intrinsic::riscv_sf_vc_v_x:
11085 case Intrinsic::riscv_sf_vc_v_i:
11086 case Intrinsic::riscv_sf_vc_v_xv:
11087 case Intrinsic::riscv_sf_vc_v_iv:
11088 case Intrinsic::riscv_sf_vc_v_vv:
11089 case Intrinsic::riscv_sf_vc_v_fv:
11090 case Intrinsic::riscv_sf_vc_v_xvv:
11091 case Intrinsic::riscv_sf_vc_v_ivv:
11092 case Intrinsic::riscv_sf_vc_v_vvv:
11093 case Intrinsic::riscv_sf_vc_v_fvv:
11094 case Intrinsic::riscv_sf_vc_v_xvw:
11095 case Intrinsic::riscv_sf_vc_v_ivw:
11096 case Intrinsic::riscv_sf_vc_v_vvw:
11097 case Intrinsic::riscv_sf_vc_v_fvw: {
11098 MVT VT = Op.getSimpleValueType();
11099
11100 SmallVector<SDValue> Operands{Op->op_values()};
11101 processVCIXOperands(Op, Operands, DAG);
11102
11103 MVT RetVT = VT;
11104 if (VT.isFixedLengthVector())
11106 else if (VT.isFloatingPoint())
11109
11110 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);
11111
11112 if (VT.isFixedLengthVector())
11113 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
11114 else if (VT.isFloatingPoint())
11115 NewNode = DAG.getBitcast(VT, NewNode);
11116
11117 if (Op == NewNode)
11118 break;
11119
11120 return NewNode;
11121 }
11122 }
11123
11124 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
11125}
11126
11128 unsigned Type) {
11129 SDLoc DL(Op);
11130 SmallVector<SDValue> Operands{Op->op_values()};
11131 Operands.erase(Operands.begin() + 1);
11132
11133 const RISCVSubtarget &Subtarget =
11135 MVT VT = Op.getSimpleValueType();
11136 MVT RetVT = VT;
11137 MVT FloatVT = VT;
11138
11139 if (VT.isFloatingPoint()) {
11140 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
11141 VT.getVectorElementCount());
11142 FloatVT = RetVT;
11143 }
11144 if (VT.isFixedLengthVector())
11146 Subtarget);
11147
11148 processVCIXOperands(Op, Operands, DAG);
11149
11150 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
11151 SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);
11152 SDValue Chain = NewNode.getValue(1);
11153
11154 if (VT.isFixedLengthVector())
11155 NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
11156 if (VT.isFloatingPoint())
11157 NewNode = DAG.getBitcast(VT, NewNode);
11158
11159 NewNode = DAG.getMergeValues({NewNode, Chain}, DL);
11160
11161 return NewNode;
11162}
11163
11165 unsigned Type) {
11166 SmallVector<SDValue> Operands{Op->op_values()};
11167 Operands.erase(Operands.begin() + 1);
11168 processVCIXOperands(Op, Operands, DAG);
11169
11170 return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
11171}
11172
11173static SDValue
11175 const RISCVSubtarget &Subtarget,
11176 SelectionDAG &DAG) {
11177 bool IsStrided;
11178 switch (IntNo) {
11179 case Intrinsic::riscv_seg2_load_mask:
11180 case Intrinsic::riscv_seg3_load_mask:
11181 case Intrinsic::riscv_seg4_load_mask:
11182 case Intrinsic::riscv_seg5_load_mask:
11183 case Intrinsic::riscv_seg6_load_mask:
11184 case Intrinsic::riscv_seg7_load_mask:
11185 case Intrinsic::riscv_seg8_load_mask:
11186 IsStrided = false;
11187 break;
11188 case Intrinsic::riscv_sseg2_load_mask:
11189 case Intrinsic::riscv_sseg3_load_mask:
11190 case Intrinsic::riscv_sseg4_load_mask:
11191 case Intrinsic::riscv_sseg5_load_mask:
11192 case Intrinsic::riscv_sseg6_load_mask:
11193 case Intrinsic::riscv_sseg7_load_mask:
11194 case Intrinsic::riscv_sseg8_load_mask:
11195 IsStrided = true;
11196 break;
11197 default:
11198 llvm_unreachable("unexpected intrinsic ID");
11199 };
11200
11201 static const Intrinsic::ID VlsegInts[7] = {
11202 Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
11203 Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,
11204 Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
11205 Intrinsic::riscv_vlseg8_mask};
11206 static const Intrinsic::ID VlssegInts[7] = {
11207 Intrinsic::riscv_vlsseg2_mask, Intrinsic::riscv_vlsseg3_mask,
11208 Intrinsic::riscv_vlsseg4_mask, Intrinsic::riscv_vlsseg5_mask,
11209 Intrinsic::riscv_vlsseg6_mask, Intrinsic::riscv_vlsseg7_mask,
11210 Intrinsic::riscv_vlsseg8_mask};
11211
11212 SDLoc DL(Op);
11213 unsigned NF = Op->getNumValues() - 1;
11214 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
11215 MVT XLenVT = Subtarget.getXLenVT();
11216 MVT VT = Op->getSimpleValueType(0);
11217 MVT ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
11218 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
11219 ContainerVT.getScalarSizeInBits();
11220 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
11221
11222 // Operands: (chain, int_id, pointer, mask, vl) or
11223 // (chain, int_id, pointer, offset, mask, vl)
11224 SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
11225 SDValue Mask = Op.getOperand(Op.getNumOperands() - 2);
11226 MVT MaskVT = Mask.getSimpleValueType();
11227 MVT MaskContainerVT =
11228 ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
11229 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
11230
11231 SDValue IntID = DAG.getTargetConstant(
11232 IsStrided ? VlssegInts[NF - 2] : VlsegInts[NF - 2], DL, XLenVT);
11233 auto *Load = cast<MemIntrinsicSDNode>(Op);
11234
11235 SDVTList VTs = DAG.getVTList({VecTupTy, MVT::Other});
11237 Load->getChain(),
11238 IntID,
11239 DAG.getUNDEF(VecTupTy),
11240 Op.getOperand(2),
11241 Mask,
11242 VL,
11245 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
11246 // Insert the stride operand.
11247 if (IsStrided)
11248 Ops.insert(std::next(Ops.begin(), 4), Op.getOperand(3));
11249
11250 SDValue Result =
11252 Load->getMemoryVT(), Load->getMemOperand());
11254 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) {
11255 SDValue SubVec = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, ContainerVT,
11256 Result.getValue(0),
11257 DAG.getTargetConstant(RetIdx, DL, MVT::i32));
11258 Results.push_back(convertFromScalableVector(VT, SubVec, DAG, Subtarget));
11259 }
11260 Results.push_back(Result.getValue(1));
11261 return DAG.getMergeValues(Results, DL);
11262}
11263
11264SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
11265 SelectionDAG &DAG) const {
11266 unsigned IntNo = Op.getConstantOperandVal(1);
11267 switch (IntNo) {
11268 default:
11269 break;
11270 case Intrinsic::riscv_seg2_load_mask:
11271 case Intrinsic::riscv_seg3_load_mask:
11272 case Intrinsic::riscv_seg4_load_mask:
11273 case Intrinsic::riscv_seg5_load_mask:
11274 case Intrinsic::riscv_seg6_load_mask:
11275 case Intrinsic::riscv_seg7_load_mask:
11276 case Intrinsic::riscv_seg8_load_mask:
11277 case Intrinsic::riscv_sseg2_load_mask:
11278 case Intrinsic::riscv_sseg3_load_mask:
11279 case Intrinsic::riscv_sseg4_load_mask:
11280 case Intrinsic::riscv_sseg5_load_mask:
11281 case Intrinsic::riscv_sseg6_load_mask:
11282 case Intrinsic::riscv_sseg7_load_mask:
11283 case Intrinsic::riscv_sseg8_load_mask:
11284 return lowerFixedVectorSegLoadIntrinsics(IntNo, Op, Subtarget, DAG);
11285
11286 case Intrinsic::riscv_sf_vc_v_x_se:
11287 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_X_SE);
11288 case Intrinsic::riscv_sf_vc_v_i_se:
11289 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_I_SE);
11290 case Intrinsic::riscv_sf_vc_v_xv_se:
11291 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XV_SE);
11292 case Intrinsic::riscv_sf_vc_v_iv_se:
11293 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IV_SE);
11294 case Intrinsic::riscv_sf_vc_v_vv_se:
11295 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VV_SE);
11296 case Intrinsic::riscv_sf_vc_v_fv_se:
11297 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FV_SE);
11298 case Intrinsic::riscv_sf_vc_v_xvv_se:
11299 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XVV_SE);
11300 case Intrinsic::riscv_sf_vc_v_ivv_se:
11301 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IVV_SE);
11302 case Intrinsic::riscv_sf_vc_v_vvv_se:
11303 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VVV_SE);
11304 case Intrinsic::riscv_sf_vc_v_fvv_se:
11305 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FVV_SE);
11306 case Intrinsic::riscv_sf_vc_v_xvw_se:
11307 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XVW_SE);
11308 case Intrinsic::riscv_sf_vc_v_ivw_se:
11309 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IVW_SE);
11310 case Intrinsic::riscv_sf_vc_v_vvw_se:
11311 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VVW_SE);
11312 case Intrinsic::riscv_sf_vc_v_fvw_se:
11313 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FVW_SE);
11314 }
11315
11316 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
11317}
11318
11319static SDValue
11321 const RISCVSubtarget &Subtarget,
11322 SelectionDAG &DAG) {
11323 bool IsStrided;
11324 switch (IntNo) {
11325 case Intrinsic::riscv_seg2_store_mask:
11326 case Intrinsic::riscv_seg3_store_mask:
11327 case Intrinsic::riscv_seg4_store_mask:
11328 case Intrinsic::riscv_seg5_store_mask:
11329 case Intrinsic::riscv_seg6_store_mask:
11330 case Intrinsic::riscv_seg7_store_mask:
11331 case Intrinsic::riscv_seg8_store_mask:
11332 IsStrided = false;
11333 break;
11334 case Intrinsic::riscv_sseg2_store_mask:
11335 case Intrinsic::riscv_sseg3_store_mask:
11336 case Intrinsic::riscv_sseg4_store_mask:
11337 case Intrinsic::riscv_sseg5_store_mask:
11338 case Intrinsic::riscv_sseg6_store_mask:
11339 case Intrinsic::riscv_sseg7_store_mask:
11340 case Intrinsic::riscv_sseg8_store_mask:
11341 IsStrided = true;
11342 break;
11343 default:
11344 llvm_unreachable("unexpected intrinsic ID");
11345 }
11346
11347 SDLoc DL(Op);
11348 static const Intrinsic::ID VssegInts[] = {
11349 Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
11350 Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
11351 Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
11352 Intrinsic::riscv_vsseg8_mask};
11353 static const Intrinsic::ID VsssegInts[] = {
11354 Intrinsic::riscv_vssseg2_mask, Intrinsic::riscv_vssseg3_mask,
11355 Intrinsic::riscv_vssseg4_mask, Intrinsic::riscv_vssseg5_mask,
11356 Intrinsic::riscv_vssseg6_mask, Intrinsic::riscv_vssseg7_mask,
11357 Intrinsic::riscv_vssseg8_mask};
11358
11359 // Operands: (chain, int_id, vec*, ptr, mask, vl) or
11360 // (chain, int_id, vec*, ptr, stride, mask, vl)
11361 unsigned NF = Op->getNumOperands() - (IsStrided ? 6 : 5);
11362 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
11363 MVT XLenVT = Subtarget.getXLenVT();
11364 MVT VT = Op->getOperand(2).getSimpleValueType();
11365 MVT ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
11366 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
11367 ContainerVT.getScalarSizeInBits();
11368 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
11369
11370 SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
11371 SDValue Mask = Op.getOperand(Op.getNumOperands() - 2);
11372 MVT MaskVT = Mask.getSimpleValueType();
11373 MVT MaskContainerVT =
11374 ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
11375 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
11376
11377 SDValue IntID = DAG.getTargetConstant(
11378 IsStrided ? VsssegInts[NF - 2] : VssegInts[NF - 2], DL, XLenVT);
11379 SDValue Ptr = Op->getOperand(NF + 2);
11380
11381 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
11382
11383 SDValue StoredVal = DAG.getUNDEF(VecTupTy);
11384 for (unsigned i = 0; i < NF; i++)
11385 StoredVal = DAG.getNode(
11386 RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
11387 convertToScalableVector(ContainerVT, FixedIntrinsic->getOperand(2 + i),
11388 DAG, Subtarget),
11389 DAG.getTargetConstant(i, DL, MVT::i32));
11390
11392 FixedIntrinsic->getChain(),
11393 IntID,
11394 StoredVal,
11395 Ptr,
11396 Mask,
11397 VL,
11398 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
11399 // Insert the stride operand.
11400 if (IsStrided)
11401 Ops.insert(std::next(Ops.begin(), 4),
11402 Op.getOperand(Op.getNumOperands() - 3));
11403
11404 return DAG.getMemIntrinsicNode(
11405 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
11406 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
11407}
11408
11409SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11410 SelectionDAG &DAG) const {
11411 unsigned IntNo = Op.getConstantOperandVal(1);
11412 switch (IntNo) {
11413 default:
11414 break;
11415 case Intrinsic::riscv_seg2_store_mask:
11416 case Intrinsic::riscv_seg3_store_mask:
11417 case Intrinsic::riscv_seg4_store_mask:
11418 case Intrinsic::riscv_seg5_store_mask:
11419 case Intrinsic::riscv_seg6_store_mask:
11420 case Intrinsic::riscv_seg7_store_mask:
11421 case Intrinsic::riscv_seg8_store_mask:
11422 case Intrinsic::riscv_sseg2_store_mask:
11423 case Intrinsic::riscv_sseg3_store_mask:
11424 case Intrinsic::riscv_sseg4_store_mask:
11425 case Intrinsic::riscv_sseg5_store_mask:
11426 case Intrinsic::riscv_sseg6_store_mask:
11427 case Intrinsic::riscv_sseg7_store_mask:
11428 case Intrinsic::riscv_sseg8_store_mask:
11429 return lowerFixedVectorSegStoreIntrinsics(IntNo, Op, Subtarget, DAG);
11430
11431 case Intrinsic::riscv_sf_vc_xv_se:
11432 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XV_SE);
11433 case Intrinsic::riscv_sf_vc_iv_se:
11434 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IV_SE);
11435 case Intrinsic::riscv_sf_vc_vv_se:
11436 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VV_SE);
11437 case Intrinsic::riscv_sf_vc_fv_se:
11438 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FV_SE);
11439 case Intrinsic::riscv_sf_vc_xvv_se:
11440 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XVV_SE);
11441 case Intrinsic::riscv_sf_vc_ivv_se:
11442 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IVV_SE);
11443 case Intrinsic::riscv_sf_vc_vvv_se:
11444 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VVV_SE);
11445 case Intrinsic::riscv_sf_vc_fvv_se:
11446 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FVV_SE);
11447 case Intrinsic::riscv_sf_vc_xvw_se:
11448 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XVW_SE);
11449 case Intrinsic::riscv_sf_vc_ivw_se:
11450 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IVW_SE);
11451 case Intrinsic::riscv_sf_vc_vvw_se:
11452 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VVW_SE);
11453 case Intrinsic::riscv_sf_vc_fvw_se:
11454 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FVW_SE);
11455 }
11456
11457 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
11458}
11459
11460static unsigned getRVVReductionOp(unsigned ISDOpcode) {
11461 switch (ISDOpcode) {
11462 default:
11463 llvm_unreachable("Unhandled reduction");
11464 case ISD::VP_REDUCE_ADD:
11465 case ISD::VECREDUCE_ADD:
11466 return RISCVISD::VECREDUCE_ADD_VL;
11467 case ISD::VP_REDUCE_UMAX:
11468 case ISD::VECREDUCE_UMAX:
11469 return RISCVISD::VECREDUCE_UMAX_VL;
11470 case ISD::VP_REDUCE_SMAX:
11471 case ISD::VECREDUCE_SMAX:
11472 return RISCVISD::VECREDUCE_SMAX_VL;
11473 case ISD::VP_REDUCE_UMIN:
11474 case ISD::VECREDUCE_UMIN:
11475 return RISCVISD::VECREDUCE_UMIN_VL;
11476 case ISD::VP_REDUCE_SMIN:
11477 case ISD::VECREDUCE_SMIN:
11478 return RISCVISD::VECREDUCE_SMIN_VL;
11479 case ISD::VP_REDUCE_AND:
11480 case ISD::VECREDUCE_AND:
11481 return RISCVISD::VECREDUCE_AND_VL;
11482 case ISD::VP_REDUCE_OR:
11483 case ISD::VECREDUCE_OR:
11484 return RISCVISD::VECREDUCE_OR_VL;
11485 case ISD::VP_REDUCE_XOR:
11486 case ISD::VECREDUCE_XOR:
11487 return RISCVISD::VECREDUCE_XOR_VL;
11488 case ISD::VP_REDUCE_FADD:
11489 return RISCVISD::VECREDUCE_FADD_VL;
11490 case ISD::VP_REDUCE_SEQ_FADD:
11491 return RISCVISD::VECREDUCE_SEQ_FADD_VL;
11492 case ISD::VP_REDUCE_FMAX:
11493 case ISD::VP_REDUCE_FMAXIMUM:
11494 return RISCVISD::VECREDUCE_FMAX_VL;
11495 case ISD::VP_REDUCE_FMIN:
11496 case ISD::VP_REDUCE_FMINIMUM:
11497 return RISCVISD::VECREDUCE_FMIN_VL;
11498 }
11499
11500}
11501
11502SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
11503 SelectionDAG &DAG,
11504 bool IsVP) const {
11505 SDLoc DL(Op);
11506 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
11507 MVT VecVT = Vec.getSimpleValueType();
11508 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
11509 Op.getOpcode() == ISD::VECREDUCE_OR ||
11510 Op.getOpcode() == ISD::VECREDUCE_XOR ||
11511 Op.getOpcode() == ISD::VP_REDUCE_AND ||
11512 Op.getOpcode() == ISD::VP_REDUCE_OR ||
11513 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
11514 "Unexpected reduction lowering");
11515
11516 MVT XLenVT = Subtarget.getXLenVT();
11517
11518 MVT ContainerVT = VecVT;
11519 if (VecVT.isFixedLengthVector()) {
11520 ContainerVT = getContainerForFixedLengthVector(VecVT);
11521 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11522 }
11523
11524 SDValue Mask, VL;
11525 if (IsVP) {
11526 Mask = Op.getOperand(2);
11527 VL = Op.getOperand(3);
11528 } else {
11529 std::tie(Mask, VL) =
11530 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11531 }
11532
11533 ISD::CondCode CC;
11534 switch (Op.getOpcode()) {
11535 default:
11536 llvm_unreachable("Unhandled reduction");
11537 case ISD::VECREDUCE_AND:
11538 case ISD::VP_REDUCE_AND: {
11539 // vcpop ~x == 0
11540 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
11541 if (IsVP || VecVT.isFixedLengthVector())
11542 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
11543 else
11544 Vec = DAG.getNode(ISD::XOR, DL, ContainerVT, Vec, TrueMask);
11545 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
11546 CC = ISD::SETEQ;
11547 break;
11548 }
11549 case ISD::VECREDUCE_OR:
11550 case ISD::VP_REDUCE_OR:
11551 // vcpop x != 0
11552 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
11553 CC = ISD::SETNE;
11554 break;
11555 case ISD::VECREDUCE_XOR:
11556 case ISD::VP_REDUCE_XOR: {
11557 // ((vcpop x) & 1) != 0
11558 SDValue One = DAG.getConstant(1, DL, XLenVT);
11559 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
11560 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
11561 CC = ISD::SETNE;
11562 break;
11563 }
11564 }
11565
11566 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11567 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
11568 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
11569
11570 if (!IsVP)
11571 return SetCC;
11572
11573 // Now include the start value in the operation.
11574 // Note that we must return the start value when no elements are operated
11575 // upon. The vcpop instructions we've emitted in each case above will return
11576 // 0 for an inactive vector, and so we've already received the neutral value:
11577 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
11578 // can simply include the start value.
11579 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
11580 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
11581}
11582
11583static bool isNonZeroAVL(SDValue AVL) {
11584 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
11585 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
11586 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
11587 (ImmAVL && ImmAVL->getZExtValue() >= 1);
11588}
11589
11590/// Helper to lower a reduction sequence of the form:
11591/// scalar = reduce_op vec, scalar_start
11592static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
11593 SDValue StartValue, SDValue Vec, SDValue Mask,
11594 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
11595 const RISCVSubtarget &Subtarget) {
11596 const MVT VecVT = Vec.getSimpleValueType();
11597 const MVT M1VT = RISCVTargetLowering::getM1VT(VecVT);
11598 const MVT XLenVT = Subtarget.getXLenVT();
11599 const bool NonZeroAVL = isNonZeroAVL(VL);
11600
11601 // The reduction needs an LMUL1 input; do the splat at either LMUL1
11602 // or the original VT if fractional.
11603 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
11604 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
11605 // prove it is non-zero. For the AVL=0 case, we need the scalar to
11606 // be the result of the reduction operation.
11607 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
11608 SDValue InitialValue =
11609 lowerScalarInsert(StartValue, InnerVL, InnerVT, DL, DAG, Subtarget);
11610 if (M1VT != InnerVT)
11611 InitialValue =
11612 DAG.getInsertSubvector(DL, DAG.getUNDEF(M1VT), InitialValue, 0);
11613 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
11615 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
11616 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
11617 return DAG.getExtractVectorElt(DL, ResVT, Reduction, 0);
11618}
11619
11620SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
11621 SelectionDAG &DAG) const {
11622 SDLoc DL(Op);
11623 SDValue Vec = Op.getOperand(0);
11624 EVT VecEVT = Vec.getValueType();
11625
11626 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
11627
11628 // Due to ordering in legalize types we may have a vector type that needs to
11629 // be split. Do that manually so we can get down to a legal type.
11630 while (getTypeAction(*DAG.getContext(), VecEVT) ==
11632 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
11633 VecEVT = Lo.getValueType();
11634 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
11635 }
11636
11637 // TODO: The type may need to be widened rather than split. Or widened before
11638 // it can be split.
11639 if (!isTypeLegal(VecEVT))
11640 return SDValue();
11641
11642 MVT VecVT = VecEVT.getSimpleVT();
11643 MVT VecEltVT = VecVT.getVectorElementType();
11644 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
11645
11646 MVT ContainerVT = VecVT;
11647 if (VecVT.isFixedLengthVector()) {
11648 ContainerVT = getContainerForFixedLengthVector(VecVT);
11649 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11650 }
11651
11652 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11653
11654 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
11655 switch (BaseOpc) {
11656 case ISD::AND:
11657 case ISD::OR:
11658 case ISD::UMAX:
11659 case ISD::UMIN:
11660 case ISD::SMAX:
11661 case ISD::SMIN:
11662 StartV = DAG.getExtractVectorElt(DL, VecEltVT, Vec, 0);
11663 }
11664 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
11665 Mask, VL, DL, DAG, Subtarget);
11666}
11667
11668// Given a reduction op, this function returns the matching reduction opcode,
11669// the vector SDValue and the scalar SDValue required to lower this to a
11670// RISCVISD node.
11671static std::tuple<unsigned, SDValue, SDValue>
11673 const RISCVSubtarget &Subtarget) {
11674 SDLoc DL(Op);
11675 auto Flags = Op->getFlags();
11676 unsigned Opcode = Op.getOpcode();
11677 switch (Opcode) {
11678 default:
11679 llvm_unreachable("Unhandled reduction");
11680 case ISD::VECREDUCE_FADD: {
11681 // Use positive zero if we can. It is cheaper to materialize.
11682 SDValue Zero =
11683 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
11684 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
11685 }
11686 case ISD::VECREDUCE_SEQ_FADD:
11687 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
11688 Op.getOperand(0));
11689 case ISD::VECREDUCE_FMINIMUM:
11690 case ISD::VECREDUCE_FMAXIMUM:
11691 case ISD::VECREDUCE_FMIN:
11692 case ISD::VECREDUCE_FMAX: {
11693 SDValue Front = DAG.getExtractVectorElt(DL, EltVT, Op.getOperand(0), 0);
11694 unsigned RVVOpc =
11695 (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)
11696 ? RISCVISD::VECREDUCE_FMIN_VL
11697 : RISCVISD::VECREDUCE_FMAX_VL;
11698 return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
11699 }
11700 }
11701}
11702
11703SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
11704 SelectionDAG &DAG) const {
11705 SDLoc DL(Op);
11706 MVT VecEltVT = Op.getSimpleValueType();
11707
11708 unsigned RVVOpcode;
11709 SDValue VectorVal, ScalarVal;
11710 std::tie(RVVOpcode, VectorVal, ScalarVal) =
11711 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
11712 MVT VecVT = VectorVal.getSimpleValueType();
11713
11714 MVT ContainerVT = VecVT;
11715 if (VecVT.isFixedLengthVector()) {
11716 ContainerVT = getContainerForFixedLengthVector(VecVT);
11717 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
11718 }
11719
11720 MVT ResVT = Op.getSimpleValueType();
11721 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11722 SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,
11723 VL, DL, DAG, Subtarget);
11724 if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
11725 Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
11726 return Res;
11727
11728 if (Op->getFlags().hasNoNaNs())
11729 return Res;
11730
11731 // Force output to NaN if any element is Nan.
11732 SDValue IsNan =
11733 DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
11734 {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),
11735 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
11736 MVT XLenVT = Subtarget.getXLenVT();
11737 SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL);
11738 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,
11739 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
11740 return DAG.getSelect(
11741 DL, ResVT, NoNaNs, Res,
11742 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
11743}
11744
11745SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
11746 SelectionDAG &DAG) const {
11747 SDLoc DL(Op);
11748 unsigned Opc = Op.getOpcode();
11749 SDValue Start = Op.getOperand(0);
11750 SDValue Vec = Op.getOperand(1);
11751 EVT VecEVT = Vec.getValueType();
11752 MVT XLenVT = Subtarget.getXLenVT();
11753
11754 // TODO: The type may need to be widened rather than split. Or widened before
11755 // it can be split.
11756 if (!isTypeLegal(VecEVT))
11757 return SDValue();
11758
11759 MVT VecVT = VecEVT.getSimpleVT();
11760 unsigned RVVOpcode = getRVVReductionOp(Opc);
11761
11762 if (VecVT.isFixedLengthVector()) {
11763 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
11764 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11765 }
11766
11767 SDValue VL = Op.getOperand(3);
11768 SDValue Mask = Op.getOperand(2);
11769 SDValue Res =
11770 lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
11771 Vec, Mask, VL, DL, DAG, Subtarget);
11772 if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) ||
11773 Op->getFlags().hasNoNaNs())
11774 return Res;
11775
11776 // Propagate NaNs.
11777 MVT PredVT = getMaskTypeFor(Vec.getSimpleValueType());
11778 // Check if any of the elements in Vec is NaN.
11779 SDValue IsNaN = DAG.getNode(
11780 RISCVISD::SETCC_VL, DL, PredVT,
11781 {Vec, Vec, DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(PredVT), Mask, VL});
11782 SDValue VCPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNaN, Mask, VL);
11783 // Check if the start value is NaN.
11784 SDValue StartIsNaN = DAG.getSetCC(DL, XLenVT, Start, Start, ISD::SETUO);
11785 VCPop = DAG.getNode(ISD::OR, DL, XLenVT, VCPop, StartIsNaN);
11786 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, VCPop,
11787 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
11788 MVT ResVT = Res.getSimpleValueType();
11789 return DAG.getSelect(
11790 DL, ResVT, NoNaNs, Res,
11791 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
11792}
11793
11794SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
11795 SelectionDAG &DAG) const {
11796 SDValue Vec = Op.getOperand(0);
11797 SDValue SubVec = Op.getOperand(1);
11798 MVT VecVT = Vec.getSimpleValueType();
11799 MVT SubVecVT = SubVec.getSimpleValueType();
11800
11801 SDLoc DL(Op);
11802 MVT XLenVT = Subtarget.getXLenVT();
11803 unsigned OrigIdx = Op.getConstantOperandVal(2);
11804 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
11805
11806 if (OrigIdx == 0 && Vec.isUndef())
11807 return Op;
11808
11809 // We don't have the ability to slide mask vectors up indexed by their i1
11810 // elements; the smallest we can do is i8. Often we are able to bitcast to
11811 // equivalent i8 vectors. Note that when inserting a fixed-length vector
11812 // into a scalable one, we might not necessarily have enough scalable
11813 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
11814 if (SubVecVT.getVectorElementType() == MVT::i1) {
11815 if (VecVT.getVectorMinNumElements() >= 8 &&
11816 SubVecVT.getVectorMinNumElements() >= 8) {
11817 assert(OrigIdx % 8 == 0 && "Invalid index");
11818 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
11819 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
11820 "Unexpected mask vector lowering");
11821 OrigIdx /= 8;
11822 SubVecVT =
11823 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
11824 SubVecVT.isScalableVector());
11825 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
11826 VecVT.isScalableVector());
11827 Vec = DAG.getBitcast(VecVT, Vec);
11828 SubVec = DAG.getBitcast(SubVecVT, SubVec);
11829 } else {
11830 // We can't slide this mask vector up indexed by its i1 elements.
11831 // This poses a problem when we wish to insert a scalable vector which
11832 // can't be re-expressed as a larger type. Just choose the slow path and
11833 // extend to a larger type, then truncate back down.
11834 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
11835 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
11836 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
11837 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
11838 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
11839 Op.getOperand(2));
11840 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
11841 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
11842 }
11843 }
11844
11845 // If the subvector vector is a fixed-length type and we don't know VLEN
11846 // exactly, we cannot use subregister manipulation to simplify the codegen; we
11847 // don't know which register of a LMUL group contains the specific subvector
11848 // as we only know the minimum register size. Therefore we must slide the
11849 // vector group up the full amount.
11850 const auto VLen = Subtarget.getRealVLen();
11851 if (SubVecVT.isFixedLengthVector() && !VLen) {
11852 MVT ContainerVT = VecVT;
11853 if (VecVT.isFixedLengthVector()) {
11854 ContainerVT = getContainerForFixedLengthVector(VecVT);
11855 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11856 }
11857
11858 SubVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), SubVec, 0);
11859
11860 SDValue Mask =
11861 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
11862 // Set the vector length to only the number of elements we care about. Note
11863 // that for slideup this includes the offset.
11864 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
11865 SDValue VL = DAG.getConstant(EndIndex, DL, XLenVT);
11866
11867 // Use tail agnostic policy if we're inserting over Vec's tail.
11869 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
11871
11872 // If we're inserting into the lowest elements, use a tail undisturbed
11873 // vmv.v.v.
11874 if (OrigIdx == 0) {
11875 SubVec =
11876 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
11877 } else {
11878 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
11879 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
11880 SlideupAmt, Mask, VL, Policy);
11881 }
11882
11883 if (VecVT.isFixedLengthVector())
11884 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
11885 return DAG.getBitcast(Op.getValueType(), SubVec);
11886 }
11887
11888 MVT ContainerVecVT = VecVT;
11889 if (VecVT.isFixedLengthVector()) {
11890 ContainerVecVT = getContainerForFixedLengthVector(VecVT);
11891 Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
11892 }
11893
11894 MVT ContainerSubVecVT = SubVecVT;
11895 if (SubVecVT.isFixedLengthVector()) {
11896 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
11897 SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
11898 }
11899
11900 unsigned SubRegIdx;
11901 ElementCount RemIdx;
11902 // insert_subvector scales the index by vscale if the subvector is scalable,
11903 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
11904 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
11905 if (SubVecVT.isFixedLengthVector()) {
11906 assert(VLen);
11907 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
11908 auto Decompose =
11910 ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
11911 SubRegIdx = Decompose.first;
11912 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
11913 (OrigIdx % Vscale));
11914 } else {
11915 auto Decompose =
11917 ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
11918 SubRegIdx = Decompose.first;
11919 RemIdx = ElementCount::getScalable(Decompose.second);
11920 }
11921
11922 TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock);
11924 Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));
11925 bool ExactlyVecRegSized =
11926 Subtarget.expandVScale(SubVecVT.getSizeInBits())
11927 .isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));
11928
11929 // 1. If the Idx has been completely eliminated and this subvector's size is
11930 // a vector register or a multiple thereof, or the surrounding elements are
11931 // undef, then this is a subvector insert which naturally aligns to a vector
11932 // register. These can easily be handled using subregister manipulation.
11933 // 2. If the subvector isn't an exact multiple of a valid register group size,
11934 // then the insertion must preserve the undisturbed elements of the register.
11935 // We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
11936 // vector type (which resolves to a subregister copy), performing a VSLIDEUP
11937 // to place the subvector within the vector register, and an INSERT_SUBVECTOR
11938 // of that LMUL=1 type back into the larger vector (resolving to another
11939 // subregister operation). See below for how our VSLIDEUP works. We go via a
11940 // LMUL=1 type to avoid allocating a large register group to hold our
11941 // subvector.
11942 if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {
11943 if (SubVecVT.isFixedLengthVector()) {
11944 // We may get NoSubRegister if inserting at index 0 and the subvec
11945 // container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
11946 if (SubRegIdx == RISCV::NoSubRegister) {
11947 assert(OrigIdx == 0);
11948 return Op;
11949 }
11950
11951 // Use a insert_subvector that will resolve to an insert subreg.
11952 assert(VLen);
11953 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
11954 SDValue Insert =
11955 DAG.getInsertSubvector(DL, Vec, SubVec, OrigIdx / Vscale);
11956 if (VecVT.isFixedLengthVector())
11957 Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
11958 return Insert;
11959 }
11960 return Op;
11961 }
11962
11963 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
11964 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
11965 // (in our case undisturbed). This means we can set up a subvector insertion
11966 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
11967 // size of the subvector.
11968 MVT InterSubVT = ContainerVecVT;
11969 SDValue AlignedExtract = Vec;
11970 unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
11971 if (SubVecVT.isFixedLengthVector()) {
11972 assert(VLen);
11973 AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
11974 }
11975 if (ContainerVecVT.bitsGT(RISCVTargetLowering::getM1VT(ContainerVecVT))) {
11976 InterSubVT = RISCVTargetLowering::getM1VT(ContainerVecVT);
11977 // Extract a subvector equal to the nearest full vector register type. This
11978 // should resolve to a EXTRACT_SUBREG instruction.
11979 AlignedExtract = DAG.getExtractSubvector(DL, InterSubVT, Vec, AlignedIdx);
11980 }
11981
11982 SubVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(InterSubVT), SubVec, 0);
11983
11984 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);
11985
11986 ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
11987 VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());
11988
11989 // Use tail agnostic policy if we're inserting over InterSubVT's tail.
11991 if (Subtarget.expandVScale(EndIndex) ==
11992 Subtarget.expandVScale(InterSubVT.getVectorElementCount()))
11994
11995 // If we're inserting into the lowest elements, use a tail undisturbed
11996 // vmv.v.v.
11997 if (RemIdx.isZero()) {
11998 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
11999 SubVec, VL);
12000 } else {
12001 SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
12002
12003 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
12004 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
12005
12006 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
12007 SlideupAmt, Mask, VL, Policy);
12008 }
12009
12010 // If required, insert this subvector back into the correct vector register.
12011 // This should resolve to an INSERT_SUBREG instruction.
12012 if (ContainerVecVT.bitsGT(InterSubVT))
12013 SubVec = DAG.getInsertSubvector(DL, Vec, SubVec, AlignedIdx);
12014
12015 if (VecVT.isFixedLengthVector())
12016 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
12017
12018 // We might have bitcast from a mask type: cast back to the original type if
12019 // required.
12020 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
12021}
12022
12023SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
12024 SelectionDAG &DAG) const {
12025 SDValue Vec = Op.getOperand(0);
12026 MVT SubVecVT = Op.getSimpleValueType();
12027 MVT VecVT = Vec.getSimpleValueType();
12028
12029 SDLoc DL(Op);
12030 MVT XLenVT = Subtarget.getXLenVT();
12031 unsigned OrigIdx = Op.getConstantOperandVal(1);
12032 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
12033
12034 // With an index of 0 this is a cast-like subvector, which can be performed
12035 // with subregister operations.
12036 if (OrigIdx == 0)
12037 return Op;
12038
12039 // We don't have the ability to slide mask vectors down indexed by their i1
12040 // elements; the smallest we can do is i8. Often we are able to bitcast to
12041 // equivalent i8 vectors. Note that when extracting a fixed-length vector
12042 // from a scalable one, we might not necessarily have enough scalable
12043 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
12044 if (SubVecVT.getVectorElementType() == MVT::i1) {
12045 if (VecVT.getVectorMinNumElements() >= 8 &&
12046 SubVecVT.getVectorMinNumElements() >= 8) {
12047 assert(OrigIdx % 8 == 0 && "Invalid index");
12048 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
12049 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
12050 "Unexpected mask vector lowering");
12051 OrigIdx /= 8;
12052 SubVecVT =
12053 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
12054 SubVecVT.isScalableVector());
12055 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
12056 VecVT.isScalableVector());
12057 Vec = DAG.getBitcast(VecVT, Vec);
12058 } else {
12059 // We can't slide this mask vector down, indexed by its i1 elements.
12060 // This poses a problem when we wish to extract a scalable vector which
12061 // can't be re-expressed as a larger type. Just choose the slow path and
12062 // extend to a larger type, then truncate back down.
12063 // TODO: We could probably improve this when extracting certain fixed
12064 // from fixed, where we can extract as i8 and shift the correct element
12065 // right to reach the desired subvector?
12066 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
12067 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
12068 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
12069 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
12070 Op.getOperand(1));
12071 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
12072 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
12073 }
12074 }
12075
12076 const auto VLen = Subtarget.getRealVLen();
12077
12078 // If the subvector vector is a fixed-length type and we don't know VLEN
12079 // exactly, we cannot use subregister manipulation to simplify the codegen; we
12080 // don't know which register of a LMUL group contains the specific subvector
12081 // as we only know the minimum register size. Therefore we must slide the
12082 // vector group down the full amount.
12083 if (SubVecVT.isFixedLengthVector() && !VLen) {
12084 MVT ContainerVT = VecVT;
12085 if (VecVT.isFixedLengthVector()) {
12086 ContainerVT = getContainerForFixedLengthVector(VecVT);
12087 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
12088 }
12089
12090 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
12091 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
12092 if (auto ShrunkVT =
12093 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
12094 ContainerVT = *ShrunkVT;
12095 Vec = DAG.getExtractSubvector(DL, ContainerVT, Vec, 0);
12096 }
12097
12098 SDValue Mask =
12099 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
12100 // Set the vector length to only the number of elements we care about. This
12101 // avoids sliding down elements we're going to discard straight away.
12102 SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
12103 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
12104 SDValue Slidedown =
12105 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
12106 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
12107 // Now we can use a cast-like subvector extract to get the result.
12108 Slidedown = DAG.getExtractSubvector(DL, SubVecVT, Slidedown, 0);
12109 return DAG.getBitcast(Op.getValueType(), Slidedown);
12110 }
12111
12112 if (VecVT.isFixedLengthVector()) {
12113 VecVT = getContainerForFixedLengthVector(VecVT);
12114 Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget);
12115 }
12116
12117 MVT ContainerSubVecVT = SubVecVT;
12118 if (SubVecVT.isFixedLengthVector())
12119 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
12120
12121 unsigned SubRegIdx;
12122 ElementCount RemIdx;
12123 // extract_subvector scales the index by vscale if the subvector is scalable,
12124 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
12125 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
12126 if (SubVecVT.isFixedLengthVector()) {
12127 assert(VLen);
12128 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
12129 auto Decompose =
12131 VecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
12132 SubRegIdx = Decompose.first;
12133 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
12134 (OrigIdx % Vscale));
12135 } else {
12136 auto Decompose =
12138 VecVT, ContainerSubVecVT, OrigIdx, TRI);
12139 SubRegIdx = Decompose.first;
12140 RemIdx = ElementCount::getScalable(Decompose.second);
12141 }
12142
12143 // If the Idx has been completely eliminated then this is a subvector extract
12144 // which naturally aligns to a vector register. These can easily be handled
12145 // using subregister manipulation. We use an extract_subvector that will
12146 // resolve to an extract subreg.
12147 if (RemIdx.isZero()) {
12148 if (SubVecVT.isFixedLengthVector()) {
12149 assert(VLen);
12150 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
12151 Vec =
12152 DAG.getExtractSubvector(DL, ContainerSubVecVT, Vec, OrigIdx / Vscale);
12153 return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);
12154 }
12155 return Op;
12156 }
12157
12158 // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
12159 // was > M1 then the index would need to be a multiple of VLMAX, and so would
12160 // divide exactly.
12161 assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||
12162 getLMUL(ContainerSubVecVT) == RISCVVType::LMUL_1);
12163
12164 // If the vector type is an LMUL-group type, extract a subvector equal to the
12165 // nearest full vector register type.
12166 MVT InterSubVT = VecVT;
12167 if (VecVT.bitsGT(RISCVTargetLowering::getM1VT(VecVT))) {
12168 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
12169 // we should have successfully decomposed the extract into a subregister.
12170 // We use an extract_subvector that will resolve to a subreg extract.
12171 assert(SubRegIdx != RISCV::NoSubRegister);
12172 (void)SubRegIdx;
12173 unsigned Idx = OrigIdx - RemIdx.getKnownMinValue();
12174 if (SubVecVT.isFixedLengthVector()) {
12175 assert(VLen);
12176 Idx /= *VLen / RISCV::RVVBitsPerBlock;
12177 }
12178 InterSubVT = RISCVTargetLowering::getM1VT(VecVT);
12179 Vec = DAG.getExtractSubvector(DL, InterSubVT, Vec, Idx);
12180 }
12181
12182 // Slide this vector register down by the desired number of elements in order
12183 // to place the desired subvector starting at element 0.
12184 SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
12185 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
12186 if (SubVecVT.isFixedLengthVector())
12187 VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
12188 SDValue Slidedown =
12189 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
12190 Vec, SlidedownAmt, Mask, VL);
12191
12192 // Now the vector is in the right position, extract our final subvector. This
12193 // should resolve to a COPY.
12194 Slidedown = DAG.getExtractSubvector(DL, SubVecVT, Slidedown, 0);
12195
12196 // We might have bitcast from a mask type: cast back to the original type if
12197 // required.
12198 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
12199}
12200
12201// Widen a vector's operands to i8, then truncate its results back to the
12202// original type, typically i1. All operand and result types must be the same.
12204 SelectionDAG &DAG) {
12205 MVT VT = N.getSimpleValueType();
12206 MVT WideVT = VT.changeVectorElementType(MVT::i8);
12208 for (SDValue Op : N->ops()) {
12209 assert(Op.getSimpleValueType() == VT &&
12210 "Operands and result must be same type");
12211 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
12212 }
12213
12214 unsigned NumVals = N->getNumValues();
12215
12217 NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
12218 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
12219 SmallVector<SDValue, 4> TruncVals;
12220 for (unsigned I = 0; I < NumVals; I++) {
12221 TruncVals.push_back(
12222 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
12223 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
12224 }
12225
12226 if (TruncVals.size() > 1)
12227 return DAG.getMergeValues(TruncVals, DL);
12228 return TruncVals.front();
12229}
12230
12231SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
12232 SelectionDAG &DAG) const {
12233 SDLoc DL(Op);
12234 MVT VecVT = Op.getSimpleValueType();
12235
12236 const unsigned Factor = Op->getNumValues();
12237 assert(Factor <= 8);
12238
12239 // 1 bit element vectors need to be widened to e8
12240 if (VecVT.getVectorElementType() == MVT::i1)
12241 return widenVectorOpsToi8(Op, DL, DAG);
12242
12243 // Convert to scalable vectors first.
12244 if (VecVT.isFixedLengthVector()) {
12245 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
12247 for (unsigned i = 0U; i < Factor; ++i)
12248 Ops[i] = convertToScalableVector(ContainerVT, Op.getOperand(i), DAG,
12249 Subtarget);
12250
12251 SmallVector<EVT, 8> VTs(Factor, ContainerVT);
12252 SDValue NewDeinterleave =
12254
12255 SmallVector<SDValue, 8> Res(Factor);
12256 for (unsigned i = 0U; i < Factor; ++i)
12257 Res[i] = convertFromScalableVector(VecVT, NewDeinterleave.getValue(i),
12258 DAG, Subtarget);
12259 return DAG.getMergeValues(Res, DL);
12260 }
12261
12262 // If concatenating would exceed LMUL=8, we need to split.
12263 if ((VecVT.getSizeInBits().getKnownMinValue() * Factor) >
12264 (8 * RISCV::RVVBitsPerBlock)) {
12265 SmallVector<SDValue, 8> Ops(Factor * 2);
12266 for (unsigned i = 0; i != Factor; ++i) {
12267 auto [OpLo, OpHi] = DAG.SplitVectorOperand(Op.getNode(), i);
12268 Ops[i * 2] = OpLo;
12269 Ops[i * 2 + 1] = OpHi;
12270 }
12271
12272 SmallVector<EVT, 8> VTs(Factor, Ops[0].getValueType());
12273
12275 ArrayRef(Ops).slice(0, Factor));
12277 ArrayRef(Ops).slice(Factor, Factor));
12278
12279 SmallVector<SDValue, 8> Res(Factor);
12280 for (unsigned i = 0; i != Factor; ++i)
12281 Res[i] = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo.getValue(i),
12282 Hi.getValue(i));
12283
12284 return DAG.getMergeValues(Res, DL);
12285 }
12286
12287 if (Subtarget.hasVendorXRivosVizip() && Factor == 2) {
12288 MVT VT = Op->getSimpleValueType(0);
12289 SDValue V1 = Op->getOperand(0);
12290 SDValue V2 = Op->getOperand(1);
12291
12292 // For fractional LMUL, check if we can use a higher LMUL
12293 // instruction to avoid a vslidedown.
12294 if (SDValue Src = foldConcatVector(V1, V2);
12295 Src && RISCVTargetLowering::getM1VT(VT).bitsGT(VT)) {
12296 EVT NewVT = VT.getDoubleNumVectorElementsVT();
12297 Src = DAG.getExtractSubvector(DL, NewVT, Src, 0);
12298 // Freeze the source so we can increase its use count.
12299 Src = DAG.getFreeze(Src);
12300 SDValue Even = lowerVZIP(RISCVISD::RI_VUNZIP2A_VL, Src,
12301 DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
12302 SDValue Odd = lowerVZIP(RISCVISD::RI_VUNZIP2B_VL, Src,
12303 DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
12304 Even = DAG.getExtractSubvector(DL, VT, Even, 0);
12305 Odd = DAG.getExtractSubvector(DL, VT, Odd, 0);
12306 return DAG.getMergeValues({Even, Odd}, DL);
12307 }
12308
12309 // Freeze the sources so we can increase their use count.
12310 V1 = DAG.getFreeze(V1);
12311 V2 = DAG.getFreeze(V2);
12312 SDValue Even =
12313 lowerVZIP(RISCVISD::RI_VUNZIP2A_VL, V1, V2, DL, DAG, Subtarget);
12314 SDValue Odd =
12315 lowerVZIP(RISCVISD::RI_VUNZIP2B_VL, V1, V2, DL, DAG, Subtarget);
12316 return DAG.getMergeValues({Even, Odd}, DL);
12317 }
12318
12319 SmallVector<SDValue, 8> Ops(Op->op_values());
12320
12321 // Concatenate the vectors as one vector to deinterleave
12322 MVT ConcatVT =
12325 PowerOf2Ceil(Factor)));
12326 if (Ops.size() < PowerOf2Ceil(Factor))
12327 Ops.append(PowerOf2Ceil(Factor) - Factor, DAG.getUNDEF(VecVT));
12328 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, Ops);
12329
12330 if (Factor == 2) {
12331 // We can deinterleave through vnsrl.wi if the element type is smaller than
12332 // ELEN
12333 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
12334 SDValue Even = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 0, DAG);
12335 SDValue Odd = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 1, DAG);
12336 return DAG.getMergeValues({Even, Odd}, DL);
12337 }
12338
12339 // For the indices, use the vmv.v.x of an i8 constant to fill the largest
12340 // possibly mask vector, then extract the required subvector. Doing this
12341 // (instead of a vid, vmsne sequence) reduces LMUL, and allows the mask
12342 // creation to be rematerialized during register allocation to reduce
12343 // register pressure if needed.
12344
12345 MVT MaskVT = ConcatVT.changeVectorElementType(MVT::i1);
12346
12347 SDValue EvenSplat = DAG.getConstant(0b01010101, DL, MVT::nxv8i8);
12348 EvenSplat = DAG.getBitcast(MVT::nxv64i1, EvenSplat);
12349 SDValue EvenMask = DAG.getExtractSubvector(DL, MaskVT, EvenSplat, 0);
12350
12351 SDValue OddSplat = DAG.getConstant(0b10101010, DL, MVT::nxv8i8);
12352 OddSplat = DAG.getBitcast(MVT::nxv64i1, OddSplat);
12353 SDValue OddMask = DAG.getExtractSubvector(DL, MaskVT, OddSplat, 0);
12354
12355 // vcompress the even and odd elements into two separate vectors
12356 SDValue EvenWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
12357 EvenMask, DAG.getUNDEF(ConcatVT));
12358 SDValue OddWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
12359 OddMask, DAG.getUNDEF(ConcatVT));
12360
12361 // Extract the result half of the gather for even and odd
12362 SDValue Even = DAG.getExtractSubvector(DL, VecVT, EvenWide, 0);
12363 SDValue Odd = DAG.getExtractSubvector(DL, VecVT, OddWide, 0);
12364
12365 return DAG.getMergeValues({Even, Odd}, DL);
12366 }
12367
12368 // Store with unit-stride store and load it back with segmented load.
12369 MVT XLenVT = Subtarget.getXLenVT();
12370 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
12371 SDValue Passthru = DAG.getUNDEF(ConcatVT);
12372
12373 // Allocate a stack slot.
12374 Align Alignment = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
12376 DAG.CreateStackTemporary(ConcatVT.getStoreSize(), Alignment);
12377 auto &MF = DAG.getMachineFunction();
12378 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
12379 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
12380
12381 SDValue StoreOps[] = {DAG.getEntryNode(),
12382 DAG.getTargetConstant(Intrinsic::riscv_vse, DL, XLenVT),
12383 Concat, StackPtr, VL};
12384
12385 SDValue Chain = DAG.getMemIntrinsicNode(
12386 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), StoreOps,
12387 ConcatVT.getVectorElementType(), PtrInfo, Alignment,
12389
12390 static const Intrinsic::ID VlsegIntrinsicsIds[] = {
12391 Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
12392 Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,
12393 Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
12394 Intrinsic::riscv_vlseg8_mask};
12395
12396 SDValue LoadOps[] = {
12397 Chain,
12398 DAG.getTargetConstant(VlsegIntrinsicsIds[Factor - 2], DL, XLenVT),
12399 Passthru,
12400 StackPtr,
12401 Mask,
12402 VL,
12405 DAG.getTargetConstant(Log2_64(VecVT.getScalarSizeInBits()), DL, XLenVT)};
12406
12407 unsigned Sz =
12408 Factor * VecVT.getVectorMinNumElements() * VecVT.getScalarSizeInBits();
12409 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, Factor);
12410
12412 ISD::INTRINSIC_W_CHAIN, DL, DAG.getVTList({VecTupTy, MVT::Other}),
12413 LoadOps, ConcatVT.getVectorElementType(), PtrInfo, Alignment,
12415
12416 SmallVector<SDValue, 8> Res(Factor);
12417
12418 for (unsigned i = 0U; i < Factor; ++i)
12419 Res[i] = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, VecVT, Load,
12420 DAG.getTargetConstant(i, DL, MVT::i32));
12421
12422 return DAG.getMergeValues(Res, DL);
12423}
12424
12425SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
12426 SelectionDAG &DAG) const {
12427 SDLoc DL(Op);
12428 MVT VecVT = Op.getSimpleValueType();
12429
12430 const unsigned Factor = Op.getNumOperands();
12431 assert(Factor <= 8);
12432
12433 // i1 vectors need to be widened to i8
12434 if (VecVT.getVectorElementType() == MVT::i1)
12435 return widenVectorOpsToi8(Op, DL, DAG);
12436
12437 // Convert to scalable vectors first.
12438 if (VecVT.isFixedLengthVector()) {
12439 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
12441 for (unsigned i = 0U; i < Factor; ++i)
12442 Ops[i] = convertToScalableVector(ContainerVT, Op.getOperand(i), DAG,
12443 Subtarget);
12444
12445 SmallVector<EVT, 8> VTs(Factor, ContainerVT);
12446 SDValue NewInterleave = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTs, Ops);
12447
12448 SmallVector<SDValue, 8> Res(Factor);
12449 for (unsigned i = 0U; i < Factor; ++i)
12450 Res[i] = convertFromScalableVector(VecVT, NewInterleave.getValue(i), DAG,
12451 Subtarget);
12452 return DAG.getMergeValues(Res, DL);
12453 }
12454
12455 MVT XLenVT = Subtarget.getXLenVT();
12456 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
12457
12458 // If the VT is larger than LMUL=8, we need to split and reassemble.
12459 if ((VecVT.getSizeInBits().getKnownMinValue() * Factor) >
12460 (8 * RISCV::RVVBitsPerBlock)) {
12461 SmallVector<SDValue, 8> Ops(Factor * 2);
12462 for (unsigned i = 0; i != Factor; ++i) {
12463 auto [OpLo, OpHi] = DAG.SplitVectorOperand(Op.getNode(), i);
12464 Ops[i] = OpLo;
12465 Ops[i + Factor] = OpHi;
12466 }
12467
12468 SmallVector<EVT, 8> VTs(Factor, Ops[0].getValueType());
12469
12470 SDValue Res[] = {DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTs,
12471 ArrayRef(Ops).take_front(Factor)),
12473 ArrayRef(Ops).drop_front(Factor))};
12474
12475 SmallVector<SDValue, 8> Concats(Factor);
12476 for (unsigned i = 0; i != Factor; ++i) {
12477 unsigned IdxLo = 2 * i;
12478 unsigned IdxHi = 2 * i + 1;
12479 Concats[i] = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
12480 Res[IdxLo / Factor].getValue(IdxLo % Factor),
12481 Res[IdxHi / Factor].getValue(IdxHi % Factor));
12482 }
12483
12484 return DAG.getMergeValues(Concats, DL);
12485 }
12486
12487 SDValue Interleaved;
12488
12489 // Spill to the stack using a segment store for simplicity.
12490 if (Factor != 2) {
12491 EVT MemVT =
12493 VecVT.getVectorElementCount() * Factor);
12494
12495 // Allocate a stack slot.
12496 Align Alignment = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
12498 DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
12499 EVT PtrVT = StackPtr.getValueType();
12500 auto &MF = DAG.getMachineFunction();
12501 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
12502 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
12503
12504 static const Intrinsic::ID IntrIds[] = {
12505 Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
12506 Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
12507 Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
12508 Intrinsic::riscv_vsseg8_mask,
12509 };
12510
12511 unsigned Sz =
12512 Factor * VecVT.getVectorMinNumElements() * VecVT.getScalarSizeInBits();
12513 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, Factor);
12514
12515 SDValue StoredVal = DAG.getUNDEF(VecTupTy);
12516 for (unsigned i = 0; i < Factor; i++)
12517 StoredVal =
12518 DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
12519 Op.getOperand(i), DAG.getTargetConstant(i, DL, MVT::i32));
12520
12521 SDValue Ops[] = {DAG.getEntryNode(),
12522 DAG.getTargetConstant(IntrIds[Factor - 2], DL, XLenVT),
12523 StoredVal,
12524 StackPtr,
12525 Mask,
12526 VL,
12528 DL, XLenVT)};
12529
12530 SDValue Chain = DAG.getMemIntrinsicNode(
12531 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
12532 VecVT.getVectorElementType(), PtrInfo, Alignment,
12534
12535 SmallVector<SDValue, 8> Loads(Factor);
12536
12538 DAG.getVScale(DL, PtrVT,
12539 APInt(PtrVT.getFixedSizeInBits(),
12540 VecVT.getStoreSize().getKnownMinValue()));
12541 for (unsigned i = 0; i != Factor; ++i) {
12542 if (i != 0)
12543 StackPtr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, Increment);
12544
12545 Loads[i] = DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
12546 }
12547
12548 return DAG.getMergeValues(Loads, DL);
12549 }
12550
12551 // Use ri.vzip2{a,b} if available
12552 // TODO: Figure out the best lowering for the spread variants
12553 if (Subtarget.hasVendorXRivosVizip() && !Op.getOperand(0).isUndef() &&
12554 !Op.getOperand(1).isUndef()) {
12555 // Freeze the sources so we can increase their use count.
12556 SDValue V1 = DAG.getFreeze(Op->getOperand(0));
12557 SDValue V2 = DAG.getFreeze(Op->getOperand(1));
12558 SDValue Lo = lowerVZIP(RISCVISD::RI_VZIP2A_VL, V1, V2, DL, DAG, Subtarget);
12559 SDValue Hi = lowerVZIP(RISCVISD::RI_VZIP2B_VL, V1, V2, DL, DAG, Subtarget);
12560 return DAG.getMergeValues({Lo, Hi}, DL);
12561 }
12562
12563 // If the element type is smaller than ELEN, then we can interleave with
12564 // vwaddu.vv and vwmaccu.vx
12565 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
12566 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
12567 DAG, Subtarget);
12568 } else {
12569 // Otherwise, fallback to using vrgathere16.vv
12570 MVT ConcatVT =
12573 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
12574 Op.getOperand(0), Op.getOperand(1));
12575
12576 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
12577
12578 // 0 1 2 3 4 5 6 7 ...
12579 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
12580
12581 // 1 1 1 1 1 1 1 1 ...
12582 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
12583
12584 // 1 0 1 0 1 0 1 0 ...
12585 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
12586 OddMask = DAG.getSetCC(
12587 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
12588 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
12590
12591 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
12592
12593 // Build up the index vector for interleaving the concatenated vector
12594 // 0 0 1 1 2 2 3 3 ...
12595 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
12596 // 0 n 1 n+1 2 n+2 3 n+3 ...
12597 Idx =
12598 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
12599
12600 // Then perform the interleave
12601 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
12602 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
12603 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
12604 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
12605 }
12606
12607 // Extract the two halves from the interleaved result
12608 SDValue Lo = DAG.getExtractSubvector(DL, VecVT, Interleaved, 0);
12609 SDValue Hi = DAG.getExtractSubvector(DL, VecVT, Interleaved,
12610 VecVT.getVectorMinNumElements());
12611
12612 return DAG.getMergeValues({Lo, Hi}, DL);
12613}
12614
12615// Lower step_vector to the vid instruction. Any non-identity step value must
12616// be accounted for my manual expansion.
12617SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
12618 SelectionDAG &DAG) const {
12619 SDLoc DL(Op);
12620 MVT VT = Op.getSimpleValueType();
12621 assert(VT.isScalableVector() && "Expected scalable vector");
12622 MVT XLenVT = Subtarget.getXLenVT();
12623 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
12624 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
12625 uint64_t StepValImm = Op.getConstantOperandVal(0);
12626 if (StepValImm != 1) {
12627 if (isPowerOf2_64(StepValImm)) {
12628 SDValue StepVal =
12629 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
12630 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
12631 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
12632 } else {
12633 SDValue StepVal = lowerScalarSplat(
12634 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
12635 VL, VT, DL, DAG, Subtarget);
12636 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
12637 }
12638 }
12639 return StepVec;
12640}
12641
12642// Implement vector_reverse using vrgather.vv with indices determined by
12643// subtracting the id of each element from (VLMAX-1). This will convert
12644// the indices like so:
12645// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
12646// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
12647SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
12648 SelectionDAG &DAG) const {
12649 SDLoc DL(Op);
12650 MVT VecVT = Op.getSimpleValueType();
12651 if (VecVT.getVectorElementType() == MVT::i1) {
12652 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
12653 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
12654 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
12655 return DAG.getSetCC(DL, VecVT, Op2,
12656 DAG.getConstant(0, DL, Op2.getValueType()), ISD::SETNE);
12657 }
12658
12659 MVT ContainerVT = VecVT;
12660 SDValue Vec = Op.getOperand(0);
12661 if (VecVT.isFixedLengthVector()) {
12662 ContainerVT = getContainerForFixedLengthVector(VecVT);
12663 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
12664 }
12665
12666 MVT XLenVT = Subtarget.getXLenVT();
12667 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
12668
12669 // On some uarchs vrgather.vv will read from every input register for each
12670 // output register, regardless of the indices. However to reverse a vector
12671 // each output register only needs to read from one register. So decompose it
12672 // into LMUL * M1 vrgather.vvs, so we get O(LMUL) performance instead of
12673 // O(LMUL^2).
12674 //
12675 // vsetvli a1, zero, e64, m4, ta, ma
12676 // vrgatherei16.vv v12, v8, v16
12677 // ->
12678 // vsetvli a1, zero, e64, m1, ta, ma
12679 // vrgather.vv v15, v8, v16
12680 // vrgather.vv v14, v9, v16
12681 // vrgather.vv v13, v10, v16
12682 // vrgather.vv v12, v11, v16
12683 if (ContainerVT.bitsGT(RISCVTargetLowering::getM1VT(ContainerVT)) &&
12684 ContainerVT.getVectorElementCount().isKnownMultipleOf(2)) {
12685 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
12686 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, Lo.getSimpleValueType(), Lo);
12687 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, Hi.getSimpleValueType(), Hi);
12688 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ContainerVT, Hi, Lo);
12689
12690 // Fixed length vectors might not fit exactly into their container, and so
12691 // leave a gap in the front of the vector after being reversed. Slide this
12692 // away.
12693 //
12694 // x x x x 3 2 1 0 <- v4i16 @ vlen=128
12695 // 0 1 2 3 x x x x <- reverse
12696 // x x x x 0 1 2 3 <- vslidedown.vx
12697 if (VecVT.isFixedLengthVector()) {
12698 SDValue Offset = DAG.getNode(
12699 ISD::SUB, DL, XLenVT,
12700 DAG.getElementCount(DL, XLenVT, ContainerVT.getVectorElementCount()),
12701 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()));
12702 Concat =
12703 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
12704 DAG.getUNDEF(ContainerVT), Concat, Offset, Mask, VL);
12705 Concat = convertFromScalableVector(VecVT, Concat, DAG, Subtarget);
12706 }
12707 return Concat;
12708 }
12709
12710 unsigned EltSize = ContainerVT.getScalarSizeInBits();
12711 unsigned MinSize = ContainerVT.getSizeInBits().getKnownMinValue();
12712 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
12713 unsigned MaxVLMAX =
12714 VecVT.isFixedLengthVector()
12715 ? VecVT.getVectorNumElements()
12716 : RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
12717
12718 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
12719 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
12720
12721 // If this is SEW=8 and VLMAX is potentially more than 256, we need
12722 // to use vrgatherei16.vv.
12723 if (MaxVLMAX > 256 && EltSize == 8) {
12724 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
12725 // Reverse each half, then reassemble them in reverse order.
12726 // NOTE: It's also possible that after splitting that VLMAX no longer
12727 // requires vrgatherei16.vv.
12728 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
12729 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
12730 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
12731 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
12732 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
12733 // Reassemble the low and high pieces reversed.
12734 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Hi, Lo);
12735 }
12736
12737 // Just promote the int type to i16 which will double the LMUL.
12738 IntVT = MVT::getVectorVT(MVT::i16, ContainerVT.getVectorElementCount());
12739 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
12740 }
12741
12742 // At LMUL > 1, do the index computation in 16 bits to reduce register
12743 // pressure.
12744 if (IntVT.getScalarType().bitsGT(MVT::i16) &&
12745 IntVT.bitsGT(RISCVTargetLowering::getM1VT(IntVT))) {
12746 assert(isUInt<16>(MaxVLMAX - 1)); // Largest VLMAX is 65536 @ zvl65536b
12747 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
12748 IntVT = IntVT.changeVectorElementType(MVT::i16);
12749 }
12750
12751 // Calculate VLMAX-1 for the desired SEW.
12752 SDValue VLMinus1 = DAG.getNode(
12753 ISD::SUB, DL, XLenVT,
12754 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()),
12755 DAG.getConstant(1, DL, XLenVT));
12756
12757 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
12758 bool IsRV32E64 =
12759 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
12760 SDValue SplatVL;
12761 if (!IsRV32E64)
12762 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
12763 else
12764 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
12765 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
12766
12767 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
12768 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
12769 DAG.getUNDEF(IntVT), Mask, VL);
12770
12771 SDValue Gather = DAG.getNode(GatherOpc, DL, ContainerVT, Vec, Indices,
12772 DAG.getUNDEF(ContainerVT), Mask, VL);
12773 if (VecVT.isFixedLengthVector())
12774 Gather = convertFromScalableVector(VecVT, Gather, DAG, Subtarget);
12775 return Gather;
12776}
12777
12778SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
12779 SelectionDAG &DAG) const {
12780 SDLoc DL(Op);
12781 SDValue V1 = Op.getOperand(0);
12782 SDValue V2 = Op.getOperand(1);
12783 MVT XLenVT = Subtarget.getXLenVT();
12784 MVT VecVT = Op.getSimpleValueType();
12785
12786 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
12787
12788 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
12789 SDValue DownOffset, UpOffset;
12790 if (ImmValue >= 0) {
12791 // The operand is a TargetConstant, we need to rebuild it as a regular
12792 // constant.
12793 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
12794 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
12795 } else {
12796 // The operand is a TargetConstant, we need to rebuild it as a regular
12797 // constant rather than negating the original operand.
12798 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
12799 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
12800 }
12801
12802 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
12803
12804 SDValue SlideDown = getVSlidedown(
12805 DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1, DownOffset, TrueMask,
12806 Subtarget.hasVLDependentLatency() ? UpOffset
12807 : DAG.getRegister(RISCV::X0, XLenVT));
12808 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
12809 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
12811}
12812
12813SDValue
12814RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
12815 SelectionDAG &DAG) const {
12816 SDLoc DL(Op);
12817 auto *Load = cast<LoadSDNode>(Op);
12818
12820 Load->getMemoryVT(),
12821 *Load->getMemOperand()) &&
12822 "Expecting a correctly-aligned load");
12823
12824 MVT VT = Op.getSimpleValueType();
12825 MVT XLenVT = Subtarget.getXLenVT();
12826 MVT ContainerVT = getContainerForFixedLengthVector(VT);
12827
12828 // If we know the exact VLEN and our fixed length vector completely fills
12829 // the container, use a whole register load instead.
12830 const auto [MinVLMAX, MaxVLMAX] =
12831 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
12832 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
12833 RISCVTargetLowering::getM1VT(ContainerVT).bitsLE(ContainerVT)) {
12834 MachineMemOperand *MMO = Load->getMemOperand();
12835 SDValue NewLoad =
12836 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
12837 MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),
12838 MMO->getAAInfo(), MMO->getRanges());
12839 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
12840 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
12841 }
12842
12843 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
12844
12845 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
12846 SDValue IntID = DAG.getTargetConstant(
12847 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
12848 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
12849 if (!IsMaskOp)
12850 Ops.push_back(DAG.getUNDEF(ContainerVT));
12851 Ops.push_back(Load->getBasePtr());
12852 Ops.push_back(VL);
12853 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12854 SDValue NewLoad =
12856 Load->getMemoryVT(), Load->getMemOperand());
12857
12858 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
12859 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
12860}
12861
12862SDValue
12863RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
12864 SelectionDAG &DAG) const {
12865 SDLoc DL(Op);
12866 auto *Store = cast<StoreSDNode>(Op);
12867
12869 Store->getMemoryVT(),
12870 *Store->getMemOperand()) &&
12871 "Expecting a correctly-aligned store");
12872
12873 SDValue StoreVal = Store->getValue();
12874 MVT VT = StoreVal.getSimpleValueType();
12875 MVT XLenVT = Subtarget.getXLenVT();
12876
12877 // If the size less than a byte, we need to pad with zeros to make a byte.
12878 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
12879 VT = MVT::v8i1;
12880 StoreVal =
12881 DAG.getInsertSubvector(DL, DAG.getConstant(0, DL, VT), StoreVal, 0);
12882 }
12883
12884 MVT ContainerVT = getContainerForFixedLengthVector(VT);
12885
12886 SDValue NewValue =
12887 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
12888
12889 // If we know the exact VLEN and our fixed length vector completely fills
12890 // the container, use a whole register store instead.
12891 const auto [MinVLMAX, MaxVLMAX] =
12892 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
12893 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
12894 RISCVTargetLowering::getM1VT(ContainerVT).bitsLE(ContainerVT)) {
12895 MachineMemOperand *MMO = Store->getMemOperand();
12896 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
12897 MMO->getPointerInfo(), MMO->getBaseAlign(),
12898 MMO->getFlags(), MMO->getAAInfo());
12899 }
12900
12901 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
12902
12903 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
12904 SDValue IntID = DAG.getTargetConstant(
12905 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
12906 return DAG.getMemIntrinsicNode(
12907 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
12908 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
12909 Store->getMemoryVT(), Store->getMemOperand());
12910}
12911
12912SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
12913 SelectionDAG &DAG) const {
12914 SDLoc DL(Op);
12915 MVT VT = Op.getSimpleValueType();
12916
12917 const auto *MemSD = cast<MemSDNode>(Op);
12918 EVT MemVT = MemSD->getMemoryVT();
12919 MachineMemOperand *MMO = MemSD->getMemOperand();
12920 SDValue Chain = MemSD->getChain();
12921 SDValue BasePtr = MemSD->getBasePtr();
12922
12923 SDValue Mask, PassThru, VL;
12924 bool IsExpandingLoad = false;
12925 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
12926 Mask = VPLoad->getMask();
12927 PassThru = DAG.getUNDEF(VT);
12928 VL = VPLoad->getVectorLength();
12929 } else {
12930 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
12931 Mask = MLoad->getMask();
12932 PassThru = MLoad->getPassThru();
12933 IsExpandingLoad = MLoad->isExpandingLoad();
12934 }
12935
12936 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12937
12938 MVT XLenVT = Subtarget.getXLenVT();
12939
12940 MVT ContainerVT = VT;
12941 if (VT.isFixedLengthVector()) {
12942 ContainerVT = getContainerForFixedLengthVector(VT);
12943 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
12944 if (!IsUnmasked) {
12945 MVT MaskVT = getMaskTypeFor(ContainerVT);
12946 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12947 }
12948 }
12949
12950 if (!VL)
12951 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
12952
12953 SDValue ExpandingVL;
12954 if (!IsUnmasked && IsExpandingLoad) {
12955 ExpandingVL = VL;
12956 VL =
12957 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
12958 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
12959 }
12960
12961 unsigned IntID = IsUnmasked || IsExpandingLoad ? Intrinsic::riscv_vle
12962 : Intrinsic::riscv_vle_mask;
12963 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
12964 if (IntID == Intrinsic::riscv_vle)
12965 Ops.push_back(DAG.getUNDEF(ContainerVT));
12966 else
12967 Ops.push_back(PassThru);
12968 Ops.push_back(BasePtr);
12969 if (IntID == Intrinsic::riscv_vle_mask)
12970 Ops.push_back(Mask);
12971 Ops.push_back(VL);
12972 if (IntID == Intrinsic::riscv_vle_mask)
12973 Ops.push_back(DAG.getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT));
12974
12975 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12976
12977 SDValue Result =
12978 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
12979 Chain = Result.getValue(1);
12980 if (ExpandingVL) {
12981 MVT IndexVT = ContainerVT;
12982 if (ContainerVT.isFloatingPoint())
12983 IndexVT = ContainerVT.changeVectorElementTypeToInteger();
12984
12985 MVT IndexEltVT = IndexVT.getVectorElementType();
12986 bool UseVRGATHEREI16 = false;
12987 // If index vector is an i8 vector and the element count exceeds 256, we
12988 // should change the element type of index vector to i16 to avoid
12989 // overflow.
12990 if (IndexEltVT == MVT::i8 && VT.getVectorNumElements() > 256) {
12991 // FIXME: We need to do vector splitting manually for LMUL=8 cases.
12992 assert(getLMUL(IndexVT) != RISCVVType::LMUL_8);
12993 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
12994 UseVRGATHEREI16 = true;
12995 }
12996
12997 SDValue Iota =
12998 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
12999 DAG.getConstant(Intrinsic::riscv_viota, DL, XLenVT),
13000 DAG.getUNDEF(IndexVT), Mask, ExpandingVL);
13001 Result =
13002 DAG.getNode(UseVRGATHEREI16 ? RISCVISD::VRGATHEREI16_VV_VL
13003 : RISCVISD::VRGATHER_VV_VL,
13004 DL, ContainerVT, Result, Iota, PassThru, Mask, ExpandingVL);
13005 }
13006
13007 if (VT.isFixedLengthVector())
13008 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
13009
13010 return DAG.getMergeValues({Result, Chain}, DL);
13011}
13012
13013SDValue RISCVTargetLowering::lowerLoadFF(SDValue Op, SelectionDAG &DAG) const {
13014 SDLoc DL(Op);
13015 MVT VT = Op->getSimpleValueType(0);
13016
13017 const auto *VPLoadFF = cast<VPLoadFFSDNode>(Op);
13018 EVT MemVT = VPLoadFF->getMemoryVT();
13019 MachineMemOperand *MMO = VPLoadFF->getMemOperand();
13020 SDValue Chain = VPLoadFF->getChain();
13021 SDValue BasePtr = VPLoadFF->getBasePtr();
13022
13023 SDValue Mask = VPLoadFF->getMask();
13024 SDValue VL = VPLoadFF->getVectorLength();
13025
13026 MVT XLenVT = Subtarget.getXLenVT();
13027
13028 MVT ContainerVT = VT;
13029 if (VT.isFixedLengthVector()) {
13030 ContainerVT = getContainerForFixedLengthVector(VT);
13031 MVT MaskVT = getMaskTypeFor(ContainerVT);
13032 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13033 }
13034
13035 unsigned IntID = Intrinsic::riscv_vleff_mask;
13036 SDValue Ops[] = {
13037 Chain,
13038 DAG.getTargetConstant(IntID, DL, XLenVT),
13039 DAG.getUNDEF(ContainerVT),
13040 BasePtr,
13041 Mask,
13042 VL,
13044
13045 SDVTList VTs = DAG.getVTList({ContainerVT, Op->getValueType(1), MVT::Other});
13046
13047 SDValue Result =
13048 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
13049 SDValue OutVL = Result.getValue(1);
13050 Chain = Result.getValue(2);
13051
13052 if (VT.isFixedLengthVector())
13053 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
13054
13055 return DAG.getMergeValues({Result, OutVL, Chain}, DL);
13056}
13057
13058SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
13059 SelectionDAG &DAG) const {
13060 SDLoc DL(Op);
13061
13062 const auto *MemSD = cast<MemSDNode>(Op);
13063 EVT MemVT = MemSD->getMemoryVT();
13064 MachineMemOperand *MMO = MemSD->getMemOperand();
13065 SDValue Chain = MemSD->getChain();
13066 SDValue BasePtr = MemSD->getBasePtr();
13067 SDValue Val, Mask, VL;
13068
13069 bool IsCompressingStore = false;
13070 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
13071 Val = VPStore->getValue();
13072 Mask = VPStore->getMask();
13073 VL = VPStore->getVectorLength();
13074 } else {
13075 const auto *MStore = cast<MaskedStoreSDNode>(Op);
13076 Val = MStore->getValue();
13077 Mask = MStore->getMask();
13078 IsCompressingStore = MStore->isCompressingStore();
13079 }
13080
13081 bool IsUnmasked =
13082 ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
13083
13084 MVT VT = Val.getSimpleValueType();
13085 MVT XLenVT = Subtarget.getXLenVT();
13086
13087 MVT ContainerVT = VT;
13088 if (VT.isFixedLengthVector()) {
13089 ContainerVT = getContainerForFixedLengthVector(VT);
13090
13091 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
13092 if (!IsUnmasked || IsCompressingStore) {
13093 MVT MaskVT = getMaskTypeFor(ContainerVT);
13094 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13095 }
13096 }
13097
13098 if (!VL)
13099 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
13100
13101 if (IsCompressingStore) {
13102 Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
13103 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
13104 DAG.getUNDEF(ContainerVT), Val, Mask, VL);
13105 VL =
13106 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
13107 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
13108 }
13109
13110 unsigned IntID =
13111 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
13112 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
13113 Ops.push_back(Val);
13114 Ops.push_back(BasePtr);
13115 if (!IsUnmasked)
13116 Ops.push_back(Mask);
13117 Ops.push_back(VL);
13118
13120 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
13121}
13122
13123SDValue RISCVTargetLowering::lowerVectorCompress(SDValue Op,
13124 SelectionDAG &DAG) const {
13125 SDLoc DL(Op);
13126 SDValue Val = Op.getOperand(0);
13127 SDValue Mask = Op.getOperand(1);
13128 SDValue Passthru = Op.getOperand(2);
13129
13130 MVT VT = Val.getSimpleValueType();
13131 MVT XLenVT = Subtarget.getXLenVT();
13132 MVT ContainerVT = VT;
13133 if (VT.isFixedLengthVector()) {
13134 ContainerVT = getContainerForFixedLengthVector(VT);
13135 MVT MaskVT = getMaskTypeFor(ContainerVT);
13136 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
13137 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13138 Passthru = convertToScalableVector(ContainerVT, Passthru, DAG, Subtarget);
13139 }
13140
13141 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
13142 SDValue Res =
13143 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
13144 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
13145 Passthru, Val, Mask, VL);
13146
13147 if (VT.isFixedLengthVector())
13148 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
13149
13150 return Res;
13151}
13152
13153SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
13154 SelectionDAG &DAG) const {
13155 unsigned Opc = Op.getOpcode();
13156 SDLoc DL(Op);
13157 SDValue Chain = Op.getOperand(0);
13158 SDValue Op1 = Op.getOperand(1);
13159 SDValue Op2 = Op.getOperand(2);
13160 SDValue CC = Op.getOperand(3);
13161 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
13162 MVT VT = Op.getSimpleValueType();
13163 MVT InVT = Op1.getSimpleValueType();
13164
13165 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
13166 // condition code.
13167 if (Opc == ISD::STRICT_FSETCCS) {
13168 // Expand strict_fsetccs(x, oeq) to
13169 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
13170 SDVTList VTList = Op->getVTList();
13171 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
13172 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
13173 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
13174 Op2, OLECCVal);
13175 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
13176 Op1, OLECCVal);
13177 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
13178 Tmp1.getValue(1), Tmp2.getValue(1));
13179 // Tmp1 and Tmp2 might be the same node.
13180 if (Tmp1 != Tmp2)
13181 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
13182 return DAG.getMergeValues({Tmp1, OutChain}, DL);
13183 }
13184
13185 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
13186 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
13187 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
13188 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
13189 Op2, OEQCCVal);
13190 SDValue Res = DAG.getNOT(DL, OEQ, VT);
13191 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
13192 }
13193 }
13194
13195 MVT ContainerInVT = InVT;
13196 if (InVT.isFixedLengthVector()) {
13197 ContainerInVT = getContainerForFixedLengthVector(InVT);
13198 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
13199 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
13200 }
13201 MVT MaskVT = getMaskTypeFor(ContainerInVT);
13202
13203 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
13204
13205 SDValue Res;
13206 if (Opc == ISD::STRICT_FSETCC &&
13207 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
13208 CCVal == ISD::SETOLE)) {
13209 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
13210 // active when both input elements are ordered.
13211 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
13212 SDValue OrderMask1 = DAG.getNode(
13213 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
13214 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
13215 True, VL});
13216 SDValue OrderMask2 = DAG.getNode(
13217 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
13218 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
13219 True, VL});
13220 Mask =
13221 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
13222 // Use Mask as the passthru operand to let the result be 0 if either of the
13223 // inputs is unordered.
13224 Res = DAG.getNode(RISCVISD::STRICT_FSETCCS_VL, DL,
13225 DAG.getVTList(MaskVT, MVT::Other),
13226 {Chain, Op1, Op2, CC, Mask, Mask, VL});
13227 } else {
13228 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
13229 : RISCVISD::STRICT_FSETCCS_VL;
13230 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
13231 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
13232 }
13233
13234 if (VT.isFixedLengthVector()) {
13235 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
13236 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
13237 }
13238 return Res;
13239}
13240
13241// Lower vector ABS to smax(X, sub(0, X)).
13242SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
13243 SDLoc DL(Op);
13244 MVT VT = Op.getSimpleValueType();
13245 SDValue X = Op.getOperand(0);
13246
13247 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
13248 "Unexpected type for ISD::ABS");
13249
13250 MVT ContainerVT = VT;
13251 if (VT.isFixedLengthVector()) {
13252 ContainerVT = getContainerForFixedLengthVector(VT);
13253 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
13254 }
13255
13256 SDValue Mask, VL;
13257 if (Op->getOpcode() == ISD::VP_ABS) {
13258 Mask = Op->getOperand(1);
13259 if (VT.isFixedLengthVector())
13260 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
13261 Subtarget);
13262 VL = Op->getOperand(2);
13263 } else
13264 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
13265
13266 SDValue SplatZero = DAG.getNode(
13267 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
13268 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
13269 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
13270 DAG.getUNDEF(ContainerVT), Mask, VL);
13271 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
13272 DAG.getUNDEF(ContainerVT), Mask, VL);
13273
13274 if (VT.isFixedLengthVector())
13275 Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
13276 return Max;
13277}
13278
13279SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
13280 SelectionDAG &DAG) const {
13281 const auto &TSInfo =
13282 static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());
13283
13284 unsigned NewOpc = getRISCVVLOp(Op);
13285 bool HasPassthruOp = TSInfo.hasPassthruOp(NewOpc);
13286 bool HasMask = TSInfo.hasMaskOp(NewOpc);
13287
13288 MVT VT = Op.getSimpleValueType();
13289 MVT ContainerVT = getContainerForFixedLengthVector(VT);
13290
13291 // Create list of operands by converting existing ones to scalable types.
13293 for (const SDValue &V : Op->op_values()) {
13294 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
13295
13296 // Pass through non-vector operands.
13297 if (!V.getValueType().isVector()) {
13298 Ops.push_back(V);
13299 continue;
13300 }
13301
13302 // "cast" fixed length vector to a scalable vector.
13303 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
13304 "Only fixed length vectors are supported!");
13305 MVT VContainerVT = ContainerVT.changeVectorElementType(
13306 V.getSimpleValueType().getVectorElementType());
13307 Ops.push_back(convertToScalableVector(VContainerVT, V, DAG, Subtarget));
13308 }
13309
13310 SDLoc DL(Op);
13311 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
13312 if (HasPassthruOp)
13313 Ops.push_back(DAG.getUNDEF(ContainerVT));
13314 if (HasMask)
13315 Ops.push_back(Mask);
13316 Ops.push_back(VL);
13317
13318 // StrictFP operations have two result values. Their lowered result should
13319 // have same result count.
13320 if (Op->isStrictFPOpcode()) {
13321 SDValue ScalableRes =
13322 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
13323 Op->getFlags());
13324 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
13325 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
13326 }
13327
13328 SDValue ScalableRes =
13329 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
13330 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
13331}
13332
13333// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
13334// * Operands of each node are assumed to be in the same order.
13335// * The EVL operand is promoted from i32 to i64 on RV64.
13336// * Fixed-length vectors are converted to their scalable-vector container
13337// types.
13338SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
13339 const auto &TSInfo =
13340 static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());
13341
13342 unsigned RISCVISDOpc = getRISCVVLOp(Op);
13343 bool HasPassthruOp = TSInfo.hasPassthruOp(RISCVISDOpc);
13344
13345 SDLoc DL(Op);
13346 MVT VT = Op.getSimpleValueType();
13348
13349 MVT ContainerVT = VT;
13350 if (VT.isFixedLengthVector())
13351 ContainerVT = getContainerForFixedLengthVector(VT);
13352
13353 for (const auto &OpIdx : enumerate(Op->ops())) {
13354 SDValue V = OpIdx.value();
13355 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
13356 // Add dummy passthru value before the mask. Or if there isn't a mask,
13357 // before EVL.
13358 if (HasPassthruOp) {
13359 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());
13360 if (MaskIdx) {
13361 if (*MaskIdx == OpIdx.index())
13362 Ops.push_back(DAG.getUNDEF(ContainerVT));
13363 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
13364 OpIdx.index()) {
13365 if (Op.getOpcode() == ISD::VP_MERGE) {
13366 // For VP_MERGE, copy the false operand instead of an undef value.
13367 Ops.push_back(Ops.back());
13368 } else {
13369 assert(Op.getOpcode() == ISD::VP_SELECT);
13370 // For VP_SELECT, add an undef value.
13371 Ops.push_back(DAG.getUNDEF(ContainerVT));
13372 }
13373 }
13374 }
13375 // VFCVT_RM_X_F_VL requires a rounding mode to be injected before the VL.
13376 if (RISCVISDOpc == RISCVISD::VFCVT_RM_X_F_VL &&
13377 ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == OpIdx.index())
13379 Subtarget.getXLenVT()));
13380 // Pass through operands which aren't fixed-length vectors.
13381 if (!V.getValueType().isFixedLengthVector()) {
13382 Ops.push_back(V);
13383 continue;
13384 }
13385 // "cast" fixed length vector to a scalable vector.
13386 MVT OpVT = V.getSimpleValueType();
13387 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
13388 assert(useRVVForFixedLengthVectorVT(OpVT) &&
13389 "Only fixed length vectors are supported!");
13390 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
13391 }
13392
13393 if (!VT.isFixedLengthVector())
13394 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
13395
13396 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
13397
13398 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
13399}
13400
13401SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
13402 SelectionDAG &DAG) const {
13403 SDLoc DL(Op);
13404 MVT VT = Op.getSimpleValueType();
13405
13406 SDValue Src = Op.getOperand(0);
13407 // NOTE: Mask is dropped.
13408 SDValue VL = Op.getOperand(2);
13409
13410 MVT ContainerVT = VT;
13411 if (VT.isFixedLengthVector()) {
13412 ContainerVT = getContainerForFixedLengthVector(VT);
13413 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
13414 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
13415 }
13416
13417 MVT XLenVT = Subtarget.getXLenVT();
13418 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
13419 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13420 DAG.getUNDEF(ContainerVT), Zero, VL);
13421
13422 SDValue SplatValue = DAG.getSignedConstant(
13423 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
13424 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13425 DAG.getUNDEF(ContainerVT), SplatValue, VL);
13426
13427 SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat,
13428 ZeroSplat, DAG.getUNDEF(ContainerVT), VL);
13429 if (!VT.isFixedLengthVector())
13430 return Result;
13431 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13432}
13433
13434SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
13435 SelectionDAG &DAG) const {
13436 SDLoc DL(Op);
13437 MVT VT = Op.getSimpleValueType();
13438
13439 SDValue Op1 = Op.getOperand(0);
13440 SDValue Op2 = Op.getOperand(1);
13441 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
13442 // NOTE: Mask is dropped.
13443 SDValue VL = Op.getOperand(4);
13444
13445 MVT ContainerVT = VT;
13446 if (VT.isFixedLengthVector()) {
13447 ContainerVT = getContainerForFixedLengthVector(VT);
13448 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
13449 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
13450 }
13451
13453 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
13454
13455 switch (Condition) {
13456 default:
13457 break;
13458 // X != Y --> (X^Y)
13459 case ISD::SETNE:
13460 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
13461 break;
13462 // X == Y --> ~(X^Y)
13463 case ISD::SETEQ: {
13464 SDValue Temp =
13465 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
13466 Result =
13467 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
13468 break;
13469 }
13470 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
13471 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
13472 case ISD::SETGT:
13473 case ISD::SETULT: {
13474 SDValue Temp =
13475 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
13476 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
13477 break;
13478 }
13479 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
13480 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
13481 case ISD::SETLT:
13482 case ISD::SETUGT: {
13483 SDValue Temp =
13484 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
13485 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
13486 break;
13487 }
13488 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
13489 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
13490 case ISD::SETGE:
13491 case ISD::SETULE: {
13492 SDValue Temp =
13493 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
13494 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
13495 break;
13496 }
13497 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
13498 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
13499 case ISD::SETLE:
13500 case ISD::SETUGE: {
13501 SDValue Temp =
13502 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
13503 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
13504 break;
13505 }
13506 }
13507
13508 if (!VT.isFixedLengthVector())
13509 return Result;
13510 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13511}
13512
13513// Lower Floating-Point/Integer Type-Convert VP SDNodes
13514SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
13515 SelectionDAG &DAG) const {
13516 SDLoc DL(Op);
13517
13518 SDValue Src = Op.getOperand(0);
13519 SDValue Mask = Op.getOperand(1);
13520 SDValue VL = Op.getOperand(2);
13521 unsigned RISCVISDOpc = getRISCVVLOp(Op);
13522
13523 MVT DstVT = Op.getSimpleValueType();
13524 MVT SrcVT = Src.getSimpleValueType();
13525 if (DstVT.isFixedLengthVector()) {
13526 DstVT = getContainerForFixedLengthVector(DstVT);
13527 SrcVT = getContainerForFixedLengthVector(SrcVT);
13528 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
13529 MVT MaskVT = getMaskTypeFor(DstVT);
13530 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13531 }
13532
13533 unsigned DstEltSize = DstVT.getScalarSizeInBits();
13534 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
13535
13537 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
13538 if (SrcVT.isInteger()) {
13539 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
13540
13541 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
13542 ? RISCVISD::VSEXT_VL
13543 : RISCVISD::VZEXT_VL;
13544
13545 // Do we need to do any pre-widening before converting?
13546 if (SrcEltSize == 1) {
13547 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
13548 MVT XLenVT = Subtarget.getXLenVT();
13549 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
13550 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
13551 DAG.getUNDEF(IntVT), Zero, VL);
13552 SDValue One = DAG.getSignedConstant(
13553 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
13554 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
13555 DAG.getUNDEF(IntVT), One, VL);
13556 Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat,
13557 ZeroSplat, DAG.getUNDEF(IntVT), VL);
13558 } else if (DstEltSize > (2 * SrcEltSize)) {
13559 // Widen before converting.
13560 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
13561 DstVT.getVectorElementCount());
13562 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
13563 }
13564
13565 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
13566 } else {
13567 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
13568 "Wrong input/output vector types");
13569
13570 // Convert f16 to f32 then convert f32 to i64.
13571 if (DstEltSize > (2 * SrcEltSize)) {
13572 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
13573 MVT InterimFVT =
13574 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
13575 Src =
13576 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
13577 }
13578
13579 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
13580 }
13581 } else { // Narrowing + Conversion
13582 if (SrcVT.isInteger()) {
13583 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
13584 // First do a narrowing convert to an FP type half the size, then round
13585 // the FP type to a small FP type if needed.
13586
13587 MVT InterimFVT = DstVT;
13588 if (SrcEltSize > (2 * DstEltSize)) {
13589 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
13590 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
13591 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
13592 }
13593
13594 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
13595
13596 if (InterimFVT != DstVT) {
13597 Src = Result;
13598 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
13599 }
13600 } else {
13601 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
13602 "Wrong input/output vector types");
13603 // First do a narrowing conversion to an integer half the size, then
13604 // truncate if needed.
13605
13606 if (DstEltSize == 1) {
13607 // First convert to the same size integer, then convert to mask using
13608 // setcc.
13609 assert(SrcEltSize >= 16 && "Unexpected FP type!");
13610 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
13611 DstVT.getVectorElementCount());
13612 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
13613
13614 // Compare the integer result to 0. The integer should be 0 or 1/-1,
13615 // otherwise the conversion was undefined.
13616 MVT XLenVT = Subtarget.getXLenVT();
13617 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
13618 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
13619 DAG.getUNDEF(InterimIVT), SplatZero, VL);
13620 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
13621 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
13622 DAG.getUNDEF(DstVT), Mask, VL});
13623 } else {
13624 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
13625 DstVT.getVectorElementCount());
13626
13627 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
13628
13629 while (InterimIVT != DstVT) {
13630 SrcEltSize /= 2;
13631 Src = Result;
13632 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
13633 DstVT.getVectorElementCount());
13634 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
13635 Src, Mask, VL);
13636 }
13637 }
13638 }
13639 }
13640
13641 MVT VT = Op.getSimpleValueType();
13642 if (!VT.isFixedLengthVector())
13643 return Result;
13644 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13645}
13646
13647SDValue RISCVTargetLowering::lowerVPMergeMask(SDValue Op,
13648 SelectionDAG &DAG) const {
13649 SDLoc DL(Op);
13650 MVT VT = Op.getSimpleValueType();
13651 MVT XLenVT = Subtarget.getXLenVT();
13652
13653 SDValue Mask = Op.getOperand(0);
13654 SDValue TrueVal = Op.getOperand(1);
13655 SDValue FalseVal = Op.getOperand(2);
13656 SDValue VL = Op.getOperand(3);
13657
13658 // Use default legalization if a vector of EVL type would be legal.
13659 EVT EVLVecVT = EVT::getVectorVT(*DAG.getContext(), VL.getValueType(),
13661 if (isTypeLegal(EVLVecVT))
13662 return SDValue();
13663
13664 MVT ContainerVT = VT;
13665 if (VT.isFixedLengthVector()) {
13666 ContainerVT = getContainerForFixedLengthVector(VT);
13667 Mask = convertToScalableVector(ContainerVT, Mask, DAG, Subtarget);
13668 TrueVal = convertToScalableVector(ContainerVT, TrueVal, DAG, Subtarget);
13669 FalseVal = convertToScalableVector(ContainerVT, FalseVal, DAG, Subtarget);
13670 }
13671
13672 // Promote to a vector of i8.
13673 MVT PromotedVT = ContainerVT.changeVectorElementType(MVT::i8);
13674
13675 // Promote TrueVal and FalseVal using VLMax.
13676 // FIXME: Is there a better way to do this?
13677 SDValue VLMax = DAG.getRegister(RISCV::X0, XLenVT);
13678 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
13679 DAG.getUNDEF(PromotedVT),
13680 DAG.getConstant(1, DL, XLenVT), VLMax);
13681 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
13682 DAG.getUNDEF(PromotedVT),
13683 DAG.getConstant(0, DL, XLenVT), VLMax);
13684 TrueVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, TrueVal, SplatOne,
13685 SplatZero, DAG.getUNDEF(PromotedVT), VL);
13686 // Any element past VL uses FalseVal, so use VLMax
13687 FalseVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, FalseVal,
13688 SplatOne, SplatZero, DAG.getUNDEF(PromotedVT), VLMax);
13689
13690 // VP_MERGE the two promoted values.
13691 SDValue VPMerge = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, Mask,
13692 TrueVal, FalseVal, FalseVal, VL);
13693
13694 // Convert back to mask.
13695 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
13696 SDValue Result = DAG.getNode(
13697 RISCVISD::SETCC_VL, DL, ContainerVT,
13698 {VPMerge, DAG.getConstant(0, DL, PromotedVT), DAG.getCondCode(ISD::SETNE),
13699 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), TrueMask, VLMax});
13700
13701 if (VT.isFixedLengthVector())
13702 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
13703 return Result;
13704}
13705
13706SDValue
13707RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
13708 SelectionDAG &DAG) const {
13709 using namespace SDPatternMatch;
13710
13711 SDLoc DL(Op);
13712
13713 SDValue Op1 = Op.getOperand(0);
13714 SDValue Op2 = Op.getOperand(1);
13715 SDValue Offset = Op.getOperand(2);
13716 SDValue Mask = Op.getOperand(3);
13717 SDValue EVL1 = Op.getOperand(4);
13718 SDValue EVL2 = Op.getOperand(5);
13719
13720 const MVT XLenVT = Subtarget.getXLenVT();
13721 MVT VT = Op.getSimpleValueType();
13722 MVT ContainerVT = VT;
13723 if (VT.isFixedLengthVector()) {
13724 ContainerVT = getContainerForFixedLengthVector(VT);
13725 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
13726 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
13727 MVT MaskVT = getMaskTypeFor(ContainerVT);
13728 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13729 }
13730
13731 bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
13732 if (IsMaskVector) {
13733 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
13734
13735 // Expand input operands
13736 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13737 DAG.getUNDEF(ContainerVT),
13738 DAG.getConstant(1, DL, XLenVT), EVL1);
13739 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13740 DAG.getUNDEF(ContainerVT),
13741 DAG.getConstant(0, DL, XLenVT), EVL1);
13742 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1,
13743 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);
13744
13745 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13746 DAG.getUNDEF(ContainerVT),
13747 DAG.getConstant(1, DL, XLenVT), EVL2);
13748 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13749 DAG.getUNDEF(ContainerVT),
13750 DAG.getConstant(0, DL, XLenVT), EVL2);
13751 Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2,
13752 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
13753 }
13754
13755 auto getVectorFirstEle = [](SDValue Vec) {
13756 SDValue FirstEle;
13757 if (sd_match(Vec, m_InsertElt(m_Value(), m_Value(FirstEle), m_Zero())))
13758 return FirstEle;
13759
13760 if (Vec.getOpcode() == ISD::SPLAT_VECTOR ||
13762 return Vec.getOperand(0);
13763
13764 return SDValue();
13765 };
13766
13767 if (!IsMaskVector && isNullConstant(Offset) && isOneConstant(EVL1))
13768 if (auto FirstEle = getVectorFirstEle(Op->getOperand(0))) {
13769 MVT EltVT = ContainerVT.getVectorElementType();
13771 if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
13772 EltVT == MVT::bf16) {
13773 EltVT = EltVT.changeTypeToInteger();
13774 ContainerVT = ContainerVT.changeVectorElementType(EltVT);
13775 Op2 = DAG.getBitcast(ContainerVT, Op2);
13776 FirstEle =
13777 DAG.getAnyExtOrTrunc(DAG.getBitcast(EltVT, FirstEle), DL, XLenVT);
13778 }
13779 Result = DAG.getNode(EltVT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL
13780 : RISCVISD::VSLIDE1UP_VL,
13781 DL, ContainerVT, DAG.getUNDEF(ContainerVT), Op2,
13782 FirstEle, Mask, EVL2);
13783 Result = DAG.getBitcast(
13785 Result);
13786 return VT.isFixedLengthVector()
13787 ? convertFromScalableVector(VT, Result, DAG, Subtarget)
13788 : Result;
13789 }
13790
13791 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
13792 SDValue DownOffset, UpOffset;
13793 if (ImmValue >= 0) {
13794 // The operand is a TargetConstant, we need to rebuild it as a regular
13795 // constant.
13796 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
13797 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);
13798 } else {
13799 // The operand is a TargetConstant, we need to rebuild it as a regular
13800 // constant rather than negating the original operand.
13801 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
13802 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
13803 }
13804
13805 if (ImmValue != 0)
13806 Op1 = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
13807 DAG.getUNDEF(ContainerVT), Op1, DownOffset, Mask,
13808 Subtarget.hasVLDependentLatency() ? UpOffset : EVL2);
13809 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, Op1, Op2,
13810 UpOffset, Mask, EVL2, RISCVVType::TAIL_AGNOSTIC);
13811
13812 if (IsMaskVector) {
13813 // Truncate Result back to a mask vector (Result has same EVL as Op2)
13814 Result = DAG.getNode(
13815 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
13816 {Result, DAG.getConstant(0, DL, ContainerVT),
13817 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
13818 Mask, EVL2});
13819 }
13820
13821 if (!VT.isFixedLengthVector())
13822 return Result;
13823 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13824}
13825
13826SDValue RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op,
13827 SelectionDAG &DAG) const {
13828 SDLoc DL(Op);
13829 SDValue Val = Op.getOperand(0);
13830 SDValue Mask = Op.getOperand(1);
13831 SDValue VL = Op.getOperand(2);
13832 MVT VT = Op.getSimpleValueType();
13833
13834 MVT ContainerVT = VT;
13835 if (VT.isFixedLengthVector()) {
13836 ContainerVT = getContainerForFixedLengthVector(VT);
13837 MVT MaskVT = getMaskTypeFor(ContainerVT);
13838 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13839 }
13840
13842 if (VT.getScalarType() == MVT::i1) {
13843 if (auto *C = dyn_cast<ConstantSDNode>(Val)) {
13844 Result =
13845 DAG.getNode(C->isZero() ? RISCVISD::VMCLR_VL : RISCVISD::VMSET_VL, DL,
13846 ContainerVT, VL);
13847 } else {
13848 MVT WidenVT = ContainerVT.changeVectorElementType(MVT::i8);
13849 SDValue LHS =
13850 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, WidenVT, DAG.getUNDEF(WidenVT),
13851 DAG.getZExtOrTrunc(Val, DL, Subtarget.getXLenVT()), VL);
13852 SDValue RHS = DAG.getConstant(0, DL, WidenVT);
13853 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
13854 {LHS, RHS, DAG.getCondCode(ISD::SETNE),
13855 DAG.getUNDEF(ContainerVT), Mask, VL});
13856 }
13857 } else {
13858 Result =
13859 lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL, DAG, Subtarget);
13860 }
13861
13862 if (!VT.isFixedLengthVector())
13863 return Result;
13864 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13865}
13866
13867SDValue
13868RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
13869 SelectionDAG &DAG) const {
13870 SDLoc DL(Op);
13871 MVT VT = Op.getSimpleValueType();
13872 MVT XLenVT = Subtarget.getXLenVT();
13873
13874 SDValue Op1 = Op.getOperand(0);
13875 SDValue Mask = Op.getOperand(1);
13876 SDValue EVL = Op.getOperand(2);
13877
13878 MVT ContainerVT = VT;
13879 if (VT.isFixedLengthVector()) {
13880 ContainerVT = getContainerForFixedLengthVector(VT);
13881 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
13882 MVT MaskVT = getMaskTypeFor(ContainerVT);
13883 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13884 }
13885
13886 MVT GatherVT = ContainerVT;
13887 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
13888 // Check if we are working with mask vectors
13889 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
13890 if (IsMaskVector) {
13891 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
13892
13893 // Expand input operand
13894 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
13895 DAG.getUNDEF(IndicesVT),
13896 DAG.getConstant(1, DL, XLenVT), EVL);
13897 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
13898 DAG.getUNDEF(IndicesVT),
13899 DAG.getConstant(0, DL, XLenVT), EVL);
13900 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne,
13901 SplatZero, DAG.getUNDEF(IndicesVT), EVL);
13902 }
13903
13904 unsigned EltSize = GatherVT.getScalarSizeInBits();
13905 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
13906 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
13907 unsigned MaxVLMAX =
13908 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
13909
13910 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
13911 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
13912 // to use vrgatherei16.vv.
13913 // TODO: It's also possible to use vrgatherei16.vv for other types to
13914 // decrease register width for the index calculation.
13915 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
13916 if (MaxVLMAX > 256 && EltSize == 8) {
13917 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
13918 // Split the vector in half and reverse each half using a full register
13919 // reverse.
13920 // Swap the halves and concatenate them.
13921 // Slide the concatenated result by (VLMax - VL).
13922 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
13923 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
13924 auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
13925
13926 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
13927 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
13928
13929 // Reassemble the low and high pieces reversed.
13930 // NOTE: this Result is unmasked (because we do not need masks for
13931 // shuffles). If in the future this has to change, we can use a SELECT_VL
13932 // between Result and UNDEF using the mask originally passed to VP_REVERSE
13933 SDValue Result =
13934 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
13935
13936 // Slide off any elements from past EVL that were reversed into the low
13937 // elements.
13938 unsigned MinElts = GatherVT.getVectorMinNumElements();
13939 SDValue VLMax =
13940 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), MinElts));
13941 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
13942
13943 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
13944 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
13945
13946 if (IsMaskVector) {
13947 // Truncate Result back to a mask vector
13948 Result =
13949 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
13950 {Result, DAG.getConstant(0, DL, GatherVT),
13952 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
13953 }
13954
13955 if (!VT.isFixedLengthVector())
13956 return Result;
13957 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13958 }
13959
13960 // Just promote the int type to i16 which will double the LMUL.
13961 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
13962 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
13963 }
13964
13965 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
13966 SDValue VecLen =
13967 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
13968 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
13969 DAG.getUNDEF(IndicesVT), VecLen, EVL);
13970 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
13971 DAG.getUNDEF(IndicesVT), Mask, EVL);
13972 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
13973 DAG.getUNDEF(GatherVT), Mask, EVL);
13974
13975 if (IsMaskVector) {
13976 // Truncate Result back to a mask vector
13977 Result = DAG.getNode(
13978 RISCVISD::SETCC_VL, DL, ContainerVT,
13979 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
13980 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
13981 }
13982
13983 if (!VT.isFixedLengthVector())
13984 return Result;
13985 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13986}
13987
13988SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
13989 SelectionDAG &DAG) const {
13990 MVT VT = Op.getSimpleValueType();
13991 if (VT.getVectorElementType() != MVT::i1)
13992 return lowerVPOp(Op, DAG);
13993
13994 // It is safe to drop mask parameter as masked-off elements are undef.
13995 SDValue Op1 = Op->getOperand(0);
13996 SDValue Op2 = Op->getOperand(1);
13997 SDValue VL = Op->getOperand(3);
13998
13999 MVT ContainerVT = VT;
14000 const bool IsFixed = VT.isFixedLengthVector();
14001 if (IsFixed) {
14002 ContainerVT = getContainerForFixedLengthVector(VT);
14003 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
14004 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
14005 }
14006
14007 SDLoc DL(Op);
14008 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
14009 if (!IsFixed)
14010 return Val;
14011 return convertFromScalableVector(VT, Val, DAG, Subtarget);
14012}
14013
14014SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
14015 SelectionDAG &DAG) const {
14016 SDLoc DL(Op);
14017 MVT XLenVT = Subtarget.getXLenVT();
14018 MVT VT = Op.getSimpleValueType();
14019 MVT ContainerVT = VT;
14020 if (VT.isFixedLengthVector())
14021 ContainerVT = getContainerForFixedLengthVector(VT);
14022
14023 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
14024
14025 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
14026 // Check if the mask is known to be all ones
14027 SDValue Mask = VPNode->getMask();
14028 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
14029
14030 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
14031 : Intrinsic::riscv_vlse_mask,
14032 DL, XLenVT);
14033 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
14034 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
14035 VPNode->getStride()};
14036 if (!IsUnmasked) {
14037 if (VT.isFixedLengthVector()) {
14038 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
14039 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14040 }
14041 Ops.push_back(Mask);
14042 }
14043 Ops.push_back(VPNode->getVectorLength());
14044 if (!IsUnmasked) {
14045 SDValue Policy =
14047 Ops.push_back(Policy);
14048 }
14049
14050 SDValue Result =
14052 VPNode->getMemoryVT(), VPNode->getMemOperand());
14053 SDValue Chain = Result.getValue(1);
14054
14055 if (VT.isFixedLengthVector())
14056 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
14057
14058 return DAG.getMergeValues({Result, Chain}, DL);
14059}
14060
14061SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
14062 SelectionDAG &DAG) const {
14063 SDLoc DL(Op);
14064 MVT XLenVT = Subtarget.getXLenVT();
14065
14066 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
14067 SDValue StoreVal = VPNode->getValue();
14068 MVT VT = StoreVal.getSimpleValueType();
14069 MVT ContainerVT = VT;
14070 if (VT.isFixedLengthVector()) {
14071 ContainerVT = getContainerForFixedLengthVector(VT);
14072 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
14073 }
14074
14075 // Check if the mask is known to be all ones
14076 SDValue Mask = VPNode->getMask();
14077 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
14078
14079 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
14080 : Intrinsic::riscv_vsse_mask,
14081 DL, XLenVT);
14082 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
14083 VPNode->getBasePtr(), VPNode->getStride()};
14084 if (!IsUnmasked) {
14085 if (VT.isFixedLengthVector()) {
14086 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
14087 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14088 }
14089 Ops.push_back(Mask);
14090 }
14091 Ops.push_back(VPNode->getVectorLength());
14092
14093 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
14094 Ops, VPNode->getMemoryVT(),
14095 VPNode->getMemOperand());
14096}
14097
14098// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
14099// matched to a RVV indexed load. The RVV indexed load instructions only
14100// support the "unsigned unscaled" addressing mode; indices are implicitly
14101// zero-extended or truncated to XLEN and are treated as byte offsets. Any
14102// signed or scaled indexing is extended to the XLEN value type and scaled
14103// accordingly.
14104SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
14105 SelectionDAG &DAG) const {
14106 SDLoc DL(Op);
14107 MVT VT = Op.getSimpleValueType();
14108
14109 const auto *MemSD = cast<MemSDNode>(Op.getNode());
14110 EVT MemVT = MemSD->getMemoryVT();
14111 MachineMemOperand *MMO = MemSD->getMemOperand();
14112 SDValue Chain = MemSD->getChain();
14113 SDValue BasePtr = MemSD->getBasePtr();
14114
14115 [[maybe_unused]] ISD::LoadExtType LoadExtType;
14116 SDValue Index, Mask, PassThru, VL;
14117
14118 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
14119 Index = VPGN->getIndex();
14120 Mask = VPGN->getMask();
14121 PassThru = DAG.getUNDEF(VT);
14122 VL = VPGN->getVectorLength();
14123 // VP doesn't support extending loads.
14125 } else {
14126 // Else it must be a MGATHER.
14127 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
14128 Index = MGN->getIndex();
14129 Mask = MGN->getMask();
14130 PassThru = MGN->getPassThru();
14131 LoadExtType = MGN->getExtensionType();
14132 }
14133
14134 MVT IndexVT = Index.getSimpleValueType();
14135 MVT XLenVT = Subtarget.getXLenVT();
14136
14138 "Unexpected VTs!");
14139 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
14140 // Targets have to explicitly opt-in for extending vector loads.
14141 assert(LoadExtType == ISD::NON_EXTLOAD &&
14142 "Unexpected extending MGATHER/VP_GATHER");
14143
14144 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
14145 // the selection of the masked intrinsics doesn't do this for us.
14146 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
14147
14148 MVT ContainerVT = VT;
14149 if (VT.isFixedLengthVector()) {
14150 ContainerVT = getContainerForFixedLengthVector(VT);
14151 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
14152 ContainerVT.getVectorElementCount());
14153
14154 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
14155
14156 if (!IsUnmasked) {
14157 MVT MaskVT = getMaskTypeFor(ContainerVT);
14158 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14159 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
14160 }
14161 }
14162
14163 if (!VL)
14164 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
14165
14166 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
14167 IndexVT = IndexVT.changeVectorElementType(XLenVT);
14168 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
14169 }
14170
14171 unsigned IntID =
14172 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
14173 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
14174 if (IsUnmasked)
14175 Ops.push_back(DAG.getUNDEF(ContainerVT));
14176 else
14177 Ops.push_back(PassThru);
14178 Ops.push_back(BasePtr);
14179 Ops.push_back(Index);
14180 if (!IsUnmasked)
14181 Ops.push_back(Mask);
14182 Ops.push_back(VL);
14183 if (!IsUnmasked)
14184 Ops.push_back(DAG.getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT));
14185
14186 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
14187 SDValue Result =
14188 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
14189 Chain = Result.getValue(1);
14190
14191 if (VT.isFixedLengthVector())
14192 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
14193
14194 return DAG.getMergeValues({Result, Chain}, DL);
14195}
14196
14197// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
14198// matched to a RVV indexed store. The RVV indexed store instructions only
14199// support the "unsigned unscaled" addressing mode; indices are implicitly
14200// zero-extended or truncated to XLEN and are treated as byte offsets. Any
14201// signed or scaled indexing is extended to the XLEN value type and scaled
14202// accordingly.
14203SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
14204 SelectionDAG &DAG) const {
14205 SDLoc DL(Op);
14206 const auto *MemSD = cast<MemSDNode>(Op.getNode());
14207 EVT MemVT = MemSD->getMemoryVT();
14208 MachineMemOperand *MMO = MemSD->getMemOperand();
14209 SDValue Chain = MemSD->getChain();
14210 SDValue BasePtr = MemSD->getBasePtr();
14211
14212 [[maybe_unused]] bool IsTruncatingStore = false;
14213 SDValue Index, Mask, Val, VL;
14214
14215 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
14216 Index = VPSN->getIndex();
14217 Mask = VPSN->getMask();
14218 Val = VPSN->getValue();
14219 VL = VPSN->getVectorLength();
14220 // VP doesn't support truncating stores.
14221 IsTruncatingStore = false;
14222 } else {
14223 // Else it must be a MSCATTER.
14224 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
14225 Index = MSN->getIndex();
14226 Mask = MSN->getMask();
14227 Val = MSN->getValue();
14228 IsTruncatingStore = MSN->isTruncatingStore();
14229 }
14230
14231 MVT VT = Val.getSimpleValueType();
14232 MVT IndexVT = Index.getSimpleValueType();
14233 MVT XLenVT = Subtarget.getXLenVT();
14234
14236 "Unexpected VTs!");
14237 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
14238 // Targets have to explicitly opt-in for extending vector loads and
14239 // truncating vector stores.
14240 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
14241
14242 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
14243 // the selection of the masked intrinsics doesn't do this for us.
14244 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
14245
14246 MVT ContainerVT = VT;
14247 if (VT.isFixedLengthVector()) {
14248 ContainerVT = getContainerForFixedLengthVector(VT);
14249 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
14250 ContainerVT.getVectorElementCount());
14251
14252 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
14253 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
14254
14255 if (!IsUnmasked) {
14256 MVT MaskVT = getMaskTypeFor(ContainerVT);
14257 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14258 }
14259 }
14260
14261 if (!VL)
14262 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
14263
14264 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
14265 IndexVT = IndexVT.changeVectorElementType(XLenVT);
14266 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
14267 }
14268
14269 unsigned IntID =
14270 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
14271 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
14272 Ops.push_back(Val);
14273 Ops.push_back(BasePtr);
14274 Ops.push_back(Index);
14275 if (!IsUnmasked)
14276 Ops.push_back(Mask);
14277 Ops.push_back(VL);
14278
14280 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
14281}
14282
14283SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
14284 SelectionDAG &DAG) const {
14285 const MVT XLenVT = Subtarget.getXLenVT();
14286 SDLoc DL(Op);
14287 SDValue Chain = Op->getOperand(0);
14288 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
14289 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
14290 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
14291
14292 // Encoding used for rounding mode in RISC-V differs from that used in
14293 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
14294 // table, which consists of a sequence of 4-bit fields, each representing
14295 // corresponding FLT_ROUNDS mode.
14296 static const int Table =
14302
14303 SDValue Shift =
14304 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
14305 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
14306 DAG.getConstant(Table, DL, XLenVT), Shift);
14307 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
14308 DAG.getConstant(7, DL, XLenVT));
14309
14310 return DAG.getMergeValues({Masked, Chain}, DL);
14311}
14312
14313SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
14314 SelectionDAG &DAG) const {
14315 const MVT XLenVT = Subtarget.getXLenVT();
14316 SDLoc DL(Op);
14317 SDValue Chain = Op->getOperand(0);
14318 SDValue RMValue = Op->getOperand(1);
14319 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
14320
14321 // Encoding used for rounding mode in RISC-V differs from that used in
14322 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
14323 // a table, which consists of a sequence of 4-bit fields, each representing
14324 // corresponding RISC-V mode.
14325 static const unsigned Table =
14331
14332 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
14333
14334 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
14335 DAG.getConstant(2, DL, XLenVT));
14336 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
14337 DAG.getConstant(Table, DL, XLenVT), Shift);
14338 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
14339 DAG.getConstant(0x7, DL, XLenVT));
14340 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
14341 RMValue);
14342}
14343
14344SDValue RISCVTargetLowering::lowerGET_FPENV(SDValue Op,
14345 SelectionDAG &DAG) const {
14346 const MVT XLenVT = Subtarget.getXLenVT();
14347 SDLoc DL(Op);
14348 SDValue Chain = Op->getOperand(0);
14349 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14350 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
14351 return DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
14352}
14353
14354SDValue RISCVTargetLowering::lowerSET_FPENV(SDValue Op,
14355 SelectionDAG &DAG) const {
14356 const MVT XLenVT = Subtarget.getXLenVT();
14357 SDLoc DL(Op);
14358 SDValue Chain = Op->getOperand(0);
14359 SDValue EnvValue = Op->getOperand(1);
14360 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14361
14362 EnvValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, EnvValue);
14363 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
14364 EnvValue);
14365}
14366
14367SDValue RISCVTargetLowering::lowerRESET_FPENV(SDValue Op,
14368 SelectionDAG &DAG) const {
14369 const MVT XLenVT = Subtarget.getXLenVT();
14370 SDLoc DL(Op);
14371 SDValue Chain = Op->getOperand(0);
14372 SDValue EnvValue = DAG.getRegister(RISCV::X0, XLenVT);
14373 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14374
14375 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
14376 EnvValue);
14377}
14378
14381
14382SDValue RISCVTargetLowering::lowerGET_FPMODE(SDValue Op,
14383 SelectionDAG &DAG) const {
14384 const MVT XLenVT = Subtarget.getXLenVT();
14385 SDLoc DL(Op);
14386 SDValue Chain = Op->getOperand(0);
14387 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14388 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
14389 SDValue Result = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
14390 Chain = Result.getValue(1);
14391 return DAG.getMergeValues({Result, Chain}, DL);
14392}
14393
14394SDValue RISCVTargetLowering::lowerSET_FPMODE(SDValue Op,
14395 SelectionDAG &DAG) const {
14396 const MVT XLenVT = Subtarget.getXLenVT();
14397 const uint64_t ModeMaskValue = Subtarget.is64Bit() ? ModeMask64 : ModeMask32;
14398 SDLoc DL(Op);
14399 SDValue Chain = Op->getOperand(0);
14400 SDValue EnvValue = Op->getOperand(1);
14401 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14402 SDValue ModeMask = DAG.getConstant(ModeMaskValue, DL, XLenVT);
14403
14404 EnvValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, EnvValue);
14405 EnvValue = DAG.getNode(ISD::AND, DL, XLenVT, EnvValue, ModeMask);
14406 Chain = DAG.getNode(RISCVISD::CLEAR_CSR, DL, MVT::Other, Chain, SysRegNo,
14407 ModeMask);
14408 return DAG.getNode(RISCVISD::SET_CSR, DL, MVT::Other, Chain, SysRegNo,
14409 EnvValue);
14410}
14411
14412SDValue RISCVTargetLowering::lowerRESET_FPMODE(SDValue Op,
14413 SelectionDAG &DAG) const {
14414 const MVT XLenVT = Subtarget.getXLenVT();
14415 const uint64_t ModeMaskValue = Subtarget.is64Bit() ? ModeMask64 : ModeMask32;
14416 SDLoc DL(Op);
14417 SDValue Chain = Op->getOperand(0);
14418 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14419 SDValue ModeMask = DAG.getConstant(ModeMaskValue, DL, XLenVT);
14420
14421 return DAG.getNode(RISCVISD::CLEAR_CSR, DL, MVT::Other, Chain, SysRegNo,
14422 ModeMask);
14423}
14424
14425SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
14426 SelectionDAG &DAG) const {
14427 MachineFunction &MF = DAG.getMachineFunction();
14428
14429 bool isRISCV64 = Subtarget.is64Bit();
14430 EVT PtrVT = getPointerTy(DAG.getDataLayout());
14431
14432 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
14433 return DAG.getFrameIndex(FI, PtrVT);
14434}
14435
14436// Returns the opcode of the target-specific SDNode that implements the 32-bit
14437// form of the given Opcode.
14438static unsigned getRISCVWOpcode(unsigned Opcode) {
14439 switch (Opcode) {
14440 default:
14441 llvm_unreachable("Unexpected opcode");
14442 case ISD::SHL:
14443 return RISCVISD::SLLW;
14444 case ISD::SRA:
14445 return RISCVISD::SRAW;
14446 case ISD::SRL:
14447 return RISCVISD::SRLW;
14448 case ISD::SDIV:
14449 return RISCVISD::DIVW;
14450 case ISD::UDIV:
14451 return RISCVISD::DIVUW;
14452 case ISD::UREM:
14453 return RISCVISD::REMUW;
14454 case ISD::ROTL:
14455 return RISCVISD::ROLW;
14456 case ISD::ROTR:
14457 return RISCVISD::RORW;
14458 }
14459}
14460
14461// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
14462// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
14463// otherwise be promoted to i64, making it difficult to select the
14464// SLLW/DIVUW/.../*W later one because the fact the operation was originally of
14465// type i8/i16/i32 is lost.
14467 unsigned ExtOpc = ISD::ANY_EXTEND) {
14468 SDLoc DL(N);
14469 unsigned WOpcode = getRISCVWOpcode(N->getOpcode());
14470 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
14471 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
14472 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
14473 // ReplaceNodeResults requires we maintain the same type for the return value.
14474 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
14475}
14476
14477// Converts the given 32-bit operation to a i64 operation with signed extension
14478// semantic to reduce the signed extension instructions.
14480 SDLoc DL(N);
14481 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14482 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14483 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
14484 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
14485 DAG.getValueType(MVT::i32));
14486 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
14487}
14488
14491 SelectionDAG &DAG) const {
14492 SDLoc DL(N);
14493 switch (N->getOpcode()) {
14494 default:
14495 llvm_unreachable("Don't know how to custom type legalize this operation!");
14498 case ISD::FP_TO_SINT:
14499 case ISD::FP_TO_UINT: {
14500 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14501 "Unexpected custom legalisation");
14502 bool IsStrict = N->isStrictFPOpcode();
14503 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
14504 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
14505 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
14506 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
14508 if (!isTypeLegal(Op0.getValueType()))
14509 return;
14510 if (IsStrict) {
14511 SDValue Chain = N->getOperand(0);
14512 // In absence of Zfh, promote f16 to f32, then convert.
14513 if (Op0.getValueType() == MVT::f16 &&
14514 !Subtarget.hasStdExtZfhOrZhinx()) {
14515 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
14516 {Chain, Op0});
14517 Chain = Op0.getValue(1);
14518 }
14519 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
14520 : RISCVISD::STRICT_FCVT_WU_RV64;
14521 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
14522 SDValue Res = DAG.getNode(
14523 Opc, DL, VTs, Chain, Op0,
14524 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
14525 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14526 Results.push_back(Res.getValue(1));
14527 return;
14528 }
14529 // For bf16, or f16 in absence of Zfh, promote [b]f16 to f32 and then
14530 // convert.
14531 if ((Op0.getValueType() == MVT::f16 &&
14532 !Subtarget.hasStdExtZfhOrZhinx()) ||
14533 Op0.getValueType() == MVT::bf16)
14534 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
14535
14536 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
14537 SDValue Res =
14538 DAG.getNode(Opc, DL, MVT::i64, Op0,
14539 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
14540 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14541 return;
14542 }
14543 // If the FP type needs to be softened, emit a library call using the 'si'
14544 // version. If we left it to default legalization we'd end up with 'di'. If
14545 // the FP type doesn't need to be softened just let generic type
14546 // legalization promote the result type.
14547 RTLIB::Libcall LC;
14548 if (IsSigned)
14549 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
14550 else
14551 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
14552 MakeLibCallOptions CallOptions;
14553 EVT OpVT = Op0.getValueType();
14554 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0));
14555 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
14556 SDValue Result;
14557 std::tie(Result, Chain) =
14558 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
14559 Results.push_back(Result);
14560 if (IsStrict)
14561 Results.push_back(Chain);
14562 break;
14563 }
14564 case ISD::LROUND: {
14565 SDValue Op0 = N->getOperand(0);
14566 EVT Op0VT = Op0.getValueType();
14567 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
14569 if (!isTypeLegal(Op0VT))
14570 return;
14571
14572 // In absence of Zfh, promote f16 to f32, then convert.
14573 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
14574 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
14575
14576 SDValue Res =
14577 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
14578 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
14579 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14580 return;
14581 }
14582 // If the FP type needs to be softened, emit a library call to lround. We'll
14583 // need to truncate the result. We assume any value that doesn't fit in i32
14584 // is allowed to return an unspecified value.
14585 RTLIB::Libcall LC =
14586 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
14587 MakeLibCallOptions CallOptions;
14588 EVT OpVT = Op0.getValueType();
14589 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
14590 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
14591 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
14592 Results.push_back(Result);
14593 break;
14594 }
14595 case ISD::READCYCLECOUNTER:
14596 case ISD::READSTEADYCOUNTER: {
14597 assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "
14598 "has custom type legalization on riscv32");
14599
14600 SDValue LoCounter, HiCounter;
14601 MVT XLenVT = Subtarget.getXLenVT();
14602 if (N->getOpcode() == ISD::READCYCLECOUNTER) {
14603 LoCounter = DAG.getTargetConstant(RISCVSysReg::cycle, DL, XLenVT);
14604 HiCounter = DAG.getTargetConstant(RISCVSysReg::cycleh, DL, XLenVT);
14605 } else {
14606 LoCounter = DAG.getTargetConstant(RISCVSysReg::time, DL, XLenVT);
14607 HiCounter = DAG.getTargetConstant(RISCVSysReg::timeh, DL, XLenVT);
14608 }
14609 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
14610 SDValue RCW = DAG.getNode(RISCVISD::READ_COUNTER_WIDE, DL, VTs,
14611 N->getOperand(0), LoCounter, HiCounter);
14612
14613 Results.push_back(
14614 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
14615 Results.push_back(RCW.getValue(2));
14616 break;
14617 }
14618 case ISD::LOAD: {
14619 if (!ISD::isNON_EXTLoad(N))
14620 return;
14621
14622 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
14623 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
14625
14626 if (N->getValueType(0) == MVT::i64) {
14627 assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&
14628 "Unexpected custom legalisation");
14629
14630 if (!Subtarget.enableUnalignedScalarMem() && Ld->getAlign() < 8)
14631 return;
14632
14633 SDLoc DL(N);
14634 SDValue Result = DAG.getMemIntrinsicNode(
14635 RISCVISD::LD_RV32, DL,
14636 DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),
14637 {Ld->getChain(), Ld->getBasePtr()}, MVT::i64, Ld->getMemOperand());
14638 SDValue Lo = Result.getValue(0);
14639 SDValue Hi = Result.getValue(1);
14640 SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
14641 Results.append({Pair, Result.getValue(2)});
14642 return;
14643 }
14644
14645 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14646 "Unexpected custom legalisation");
14647
14648 SDLoc dl(N);
14649 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
14650 Ld->getBasePtr(), Ld->getMemoryVT(),
14651 Ld->getMemOperand());
14652 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
14653 Results.push_back(Res.getValue(1));
14654 return;
14655 }
14656 case ISD::MUL: {
14657 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
14658 unsigned XLen = Subtarget.getXLen();
14659 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
14660 if (Size > XLen) {
14661 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
14662 SDValue LHS = N->getOperand(0);
14663 SDValue RHS = N->getOperand(1);
14664 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
14665
14666 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
14667 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
14668 // We need exactly one side to be unsigned.
14669 if (LHSIsU == RHSIsU)
14670 return;
14671
14672 auto MakeMULPair = [&](SDValue S, SDValue U) {
14673 MVT XLenVT = Subtarget.getXLenVT();
14674 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
14675 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
14676 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
14677 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
14678 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
14679 };
14680
14681 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
14682 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
14683
14684 // The other operand should be signed, but still prefer MULH when
14685 // possible.
14686 if (RHSIsU && LHSIsS && !RHSIsS)
14687 Results.push_back(MakeMULPair(LHS, RHS));
14688 else if (LHSIsU && RHSIsS && !LHSIsS)
14689 Results.push_back(MakeMULPair(RHS, LHS));
14690
14691 return;
14692 }
14693 [[fallthrough]];
14694 }
14695 case ISD::ADD:
14696 case ISD::SUB:
14697 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14698 "Unexpected custom legalisation");
14699 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
14700 break;
14701 case ISD::SHL:
14702 case ISD::SRA:
14703 case ISD::SRL:
14704 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14705 "Unexpected custom legalisation");
14706 if (N->getOperand(1).getOpcode() != ISD::Constant) {
14707 // If we can use a BSET instruction, allow default promotion to apply.
14708 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
14709 isOneConstant(N->getOperand(0)))
14710 break;
14711 Results.push_back(customLegalizeToWOp(N, DAG));
14712 break;
14713 }
14714
14715 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
14716 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
14717 // shift amount.
14718 if (N->getOpcode() == ISD::SHL) {
14719 SDLoc DL(N);
14720 SDValue NewOp0 =
14721 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14722 SDValue NewOp1 =
14723 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
14724 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
14725 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
14726 DAG.getValueType(MVT::i32));
14727 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
14728 }
14729
14730 break;
14731 case ISD::ROTL:
14732 case ISD::ROTR:
14733 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14734 "Unexpected custom legalisation");
14735 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
14736 Subtarget.hasVendorXTHeadBb()) &&
14737 "Unexpected custom legalization");
14738 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
14739 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
14740 return;
14741 Results.push_back(customLegalizeToWOp(N, DAG));
14742 break;
14743 case ISD::CTTZ:
14745 case ISD::CTLZ:
14746 case ISD::CTLZ_ZERO_UNDEF: {
14747 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14748 "Unexpected custom legalisation");
14749
14750 SDValue NewOp0 =
14751 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14752 bool IsCTZ =
14753 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
14754
14755 // Without Zbb, lower as 32 - clzw(~X & (X-1))
14756 if (IsCTZ && !Subtarget.hasStdExtZbb()) {
14757 assert(Subtarget.hasStdExtP());
14758
14759 NewOp0 = DAG.getFreeze(NewOp0);
14760 SDValue Not = DAG.getNOT(DL, NewOp0, MVT::i64);
14761 SDValue Minus1 = DAG.getNode(ISD::SUB, DL, MVT::i64, NewOp0,
14762 DAG.getConstant(1, DL, MVT::i64));
14763 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Not, Minus1);
14764 SDValue CLZW = DAG.getNode(RISCVISD::CLZW, DL, MVT::i64, And);
14765 SDValue Sub = DAG.getNode(ISD::SUB, DL, MVT::i64,
14766 DAG.getConstant(32, DL, MVT::i64), CLZW);
14767 SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Sub,
14768 DAG.getValueType(MVT::i32));
14769 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14770 return;
14771 }
14772
14773 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
14774 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
14775 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14776 return;
14777 }
14778 case ISD::SDIV:
14779 case ISD::UDIV:
14780 case ISD::UREM: {
14781 MVT VT = N->getSimpleValueType(0);
14782 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
14783 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
14784 "Unexpected custom legalisation");
14785 // Don't promote division/remainder by constant since we should expand those
14786 // to multiply by magic constant.
14787 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
14788 if (N->getOperand(1).getOpcode() == ISD::Constant &&
14789 !isIntDivCheap(N->getValueType(0), Attr))
14790 return;
14791
14792 // If the input is i32, use ANY_EXTEND since the W instructions don't read
14793 // the upper 32 bits. For other types we need to sign or zero extend
14794 // based on the opcode.
14795 unsigned ExtOpc = ISD::ANY_EXTEND;
14796 if (VT != MVT::i32)
14797 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
14799
14800 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
14801 break;
14802 }
14803 case ISD::SADDO: {
14804 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14805 "Unexpected custom legalisation");
14806
14807 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
14808 // use the default legalization.
14809 if (!isa<ConstantSDNode>(N->getOperand(1)))
14810 return;
14811
14812 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
14813 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
14814 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
14815 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
14816 DAG.getValueType(MVT::i32));
14817
14818 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
14819
14820 // For an addition, the result should be less than one of the operands (LHS)
14821 // if and only if the other operand (RHS) is negative, otherwise there will
14822 // be overflow.
14823 // For a subtraction, the result should be less than one of the operands
14824 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
14825 // otherwise there will be overflow.
14826 EVT OType = N->getValueType(1);
14827 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
14828 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
14829
14830 SDValue Overflow =
14831 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
14832 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14833 Results.push_back(Overflow);
14834 return;
14835 }
14836 case ISD::UADDO:
14837 case ISD::USUBO: {
14838 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14839 "Unexpected custom legalisation");
14840 bool IsAdd = N->getOpcode() == ISD::UADDO;
14841 // Create an ADDW or SUBW.
14842 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14843 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14844 SDValue Res =
14845 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
14846 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
14847 DAG.getValueType(MVT::i32));
14848
14849 SDValue Overflow;
14850 if (IsAdd && isOneConstant(RHS)) {
14851 // Special case uaddo X, 1 overflowed if the addition result is 0.
14852 // The general case (X + C) < C is not necessarily beneficial. Although we
14853 // reduce the live range of X, we may introduce the materialization of
14854 // constant C, especially when the setcc result is used by branch. We have
14855 // no compare with constant and branch instructions.
14856 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
14857 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
14858 } else if (IsAdd && isAllOnesConstant(RHS)) {
14859 // Special case uaddo X, -1 overflowed if X != 0.
14860 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
14861 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
14862 } else {
14863 // Sign extend the LHS and perform an unsigned compare with the ADDW
14864 // result. Since the inputs are sign extended from i32, this is equivalent
14865 // to comparing the lower 32 bits.
14866 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
14867 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
14868 IsAdd ? ISD::SETULT : ISD::SETUGT);
14869 }
14870
14871 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14872 Results.push_back(Overflow);
14873 return;
14874 }
14875 case ISD::UADDSAT:
14876 case ISD::USUBSAT: {
14877 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14878 !Subtarget.hasStdExtZbb() && "Unexpected custom legalisation");
14879 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
14880 // promotion for UADDO/USUBO.
14881 Results.push_back(expandAddSubSat(N, DAG));
14882 return;
14883 }
14884 case ISD::SADDSAT:
14885 case ISD::SSUBSAT: {
14886 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14887 "Unexpected custom legalisation");
14888 Results.push_back(expandAddSubSat(N, DAG));
14889 return;
14890 }
14891 case ISD::ABS: {
14892 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14893 "Unexpected custom legalisation");
14894
14895 if (Subtarget.hasStdExtP()) {
14896 SDValue Src =
14897 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14898 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
14899 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
14900 return;
14901 }
14902
14903 if (Subtarget.hasStdExtZbb()) {
14904 // Emit a special node that will be expanded to NEGW+MAX at isel.
14905 // This allows us to remember that the result is sign extended. Expanding
14906 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
14907 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
14908 N->getOperand(0));
14909 SDValue Abs = DAG.getNode(RISCVISD::NEGW_MAX, DL, MVT::i64, Src);
14910 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
14911 return;
14912 }
14913
14914 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
14915 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14916
14917 // Freeze the source so we can increase it's use count.
14918 Src = DAG.getFreeze(Src);
14919
14920 // Copy sign bit to all bits using the sraiw pattern.
14921 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
14922 DAG.getValueType(MVT::i32));
14923 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
14924 DAG.getConstant(31, DL, MVT::i64));
14925
14926 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
14927 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
14928
14929 // NOTE: The result is only required to be anyextended, but sext is
14930 // consistent with type legalization of sub.
14931 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
14932 DAG.getValueType(MVT::i32));
14933 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
14934 return;
14935 }
14936 case ISD::BITCAST: {
14937 EVT VT = N->getValueType(0);
14938 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
14939 SDValue Op0 = N->getOperand(0);
14940 EVT Op0VT = Op0.getValueType();
14941 MVT XLenVT = Subtarget.getXLenVT();
14942 if (VT == MVT::i16 &&
14943 ((Op0VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
14944 (Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
14945 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
14946 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
14947 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
14948 Subtarget.hasStdExtFOrZfinx()) {
14949 SDValue FPConv =
14950 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
14951 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
14952 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && !Subtarget.is64Bit() &&
14953 Subtarget.hasStdExtDOrZdinx()) {
14954 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
14955 DAG.getVTList(MVT::i32, MVT::i32), Op0);
14956 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
14957 NewReg.getValue(0), NewReg.getValue(1));
14958 Results.push_back(RetReg);
14959 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
14960 isTypeLegal(Op0VT)) {
14961 // Custom-legalize bitcasts from fixed-length vector types to illegal
14962 // scalar types in order to improve codegen. Bitcast the vector to a
14963 // one-element vector type whose element type is the same as the result
14964 // type, and extract the first element.
14965 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
14966 if (isTypeLegal(BVT)) {
14967 SDValue BVec = DAG.getBitcast(BVT, Op0);
14968 Results.push_back(DAG.getExtractVectorElt(DL, VT, BVec, 0));
14969 }
14970 }
14971 break;
14972 }
14973 case ISD::BITREVERSE: {
14974 assert(N->getValueType(0) == MVT::i8 && Subtarget.hasStdExtZbkb() &&
14975 "Unexpected custom legalisation");
14976 MVT XLenVT = Subtarget.getXLenVT();
14977 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
14978 SDValue NewRes = DAG.getNode(RISCVISD::BREV8, DL, XLenVT, NewOp);
14979 // ReplaceNodeResults requires we maintain the same type for the return
14980 // value.
14981 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, NewRes));
14982 break;
14983 }
14984 case RISCVISD::BREV8:
14985 case RISCVISD::ORC_B: {
14986 MVT VT = N->getSimpleValueType(0);
14987 MVT XLenVT = Subtarget.getXLenVT();
14988 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
14989 "Unexpected custom legalisation");
14990 assert(((N->getOpcode() == RISCVISD::BREV8 && Subtarget.hasStdExtZbkb()) ||
14991 (N->getOpcode() == RISCVISD::ORC_B && Subtarget.hasStdExtZbb())) &&
14992 "Unexpected extension");
14993 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
14994 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
14995 // ReplaceNodeResults requires we maintain the same type for the return
14996 // value.
14997 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
14998 break;
14999 }
15001 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
15002 // type is illegal (currently only vXi64 RV32).
15003 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
15004 // transferred to the destination register. We issue two of these from the
15005 // upper- and lower- halves of the SEW-bit vector element, slid down to the
15006 // first element.
15007 SDValue Vec = N->getOperand(0);
15008 SDValue Idx = N->getOperand(1);
15009
15010 // The vector type hasn't been legalized yet so we can't issue target
15011 // specific nodes if it needs legalization.
15012 // FIXME: We would manually legalize if it's important.
15013 if (!isTypeLegal(Vec.getValueType()))
15014 return;
15015
15016 MVT VecVT = Vec.getSimpleValueType();
15017
15018 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
15019 VecVT.getVectorElementType() == MVT::i64 &&
15020 "Unexpected EXTRACT_VECTOR_ELT legalization");
15021
15022 // If this is a fixed vector, we need to convert it to a scalable vector.
15023 MVT ContainerVT = VecVT;
15024 if (VecVT.isFixedLengthVector()) {
15025 ContainerVT = getContainerForFixedLengthVector(VecVT);
15026 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
15027 }
15028
15029 MVT XLenVT = Subtarget.getXLenVT();
15030
15031 // Use a VL of 1 to avoid processing more elements than we need.
15032 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
15033
15034 // Unless the index is known to be 0, we must slide the vector down to get
15035 // the desired element into index 0.
15036 if (!isNullConstant(Idx)) {
15037 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
15038 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
15039 }
15040
15041 // Extract the lower XLEN bits of the correct vector element.
15042 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
15043
15044 // To extract the upper XLEN bits of the vector element, shift the first
15045 // element right by 32 bits and re-extract the lower XLEN bits.
15046 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
15047 DAG.getUNDEF(ContainerVT),
15048 DAG.getConstant(32, DL, XLenVT), VL);
15049 SDValue LShr32 =
15050 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
15051 DAG.getUNDEF(ContainerVT), Mask, VL);
15052
15053 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
15054
15055 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
15056 break;
15057 }
15059 unsigned IntNo = N->getConstantOperandVal(0);
15060 switch (IntNo) {
15061 default:
15063 "Don't know how to custom type legalize this intrinsic!");
15064 case Intrinsic::experimental_get_vector_length: {
15065 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
15066 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15067 return;
15068 }
15069 case Intrinsic::experimental_cttz_elts: {
15070 SDValue Res = lowerCttzElts(N, DAG, Subtarget);
15071 Results.push_back(
15072 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res));
15073 return;
15074 }
15075 case Intrinsic::riscv_orc_b:
15076 case Intrinsic::riscv_brev8:
15077 case Intrinsic::riscv_sha256sig0:
15078 case Intrinsic::riscv_sha256sig1:
15079 case Intrinsic::riscv_sha256sum0:
15080 case Intrinsic::riscv_sha256sum1:
15081 case Intrinsic::riscv_sm3p0:
15082 case Intrinsic::riscv_sm3p1: {
15083 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15084 return;
15085 unsigned Opc;
15086 switch (IntNo) {
15087 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
15088 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
15089 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
15090 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
15091 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
15092 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
15093 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
15094 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
15095 }
15096
15097 SDValue NewOp =
15098 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15099 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
15100 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15101 return;
15102 }
15103 case Intrinsic::riscv_sm4ks:
15104 case Intrinsic::riscv_sm4ed: {
15105 unsigned Opc =
15106 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
15107 SDValue NewOp0 =
15108 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15109 SDValue NewOp1 =
15110 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
15111 SDValue Res =
15112 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
15113 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15114 return;
15115 }
15116 case Intrinsic::riscv_mopr: {
15117 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15118 return;
15119 SDValue NewOp =
15120 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15121 SDValue Res = DAG.getNode(
15122 RISCVISD::MOP_R, DL, MVT::i64, NewOp,
15123 DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
15124 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15125 return;
15126 }
15127 case Intrinsic::riscv_moprr: {
15128 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15129 return;
15130 SDValue NewOp0 =
15131 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15132 SDValue NewOp1 =
15133 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
15134 SDValue Res = DAG.getNode(
15135 RISCVISD::MOP_RR, DL, MVT::i64, NewOp0, NewOp1,
15136 DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
15137 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15138 return;
15139 }
15140 case Intrinsic::riscv_clmul: {
15141 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15142 return;
15143
15144 SDValue NewOp0 =
15145 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15146 SDValue NewOp1 =
15147 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
15148 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
15149 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15150 return;
15151 }
15152 case Intrinsic::riscv_clmulh:
15153 case Intrinsic::riscv_clmulr: {
15154 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15155 return;
15156
15157 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
15158 // to the full 128-bit clmul result of multiplying two xlen values.
15159 // Perform clmulr or clmulh on the shifted values. Finally, extract the
15160 // upper 32 bits.
15161 //
15162 // The alternative is to mask the inputs to 32 bits and use clmul, but
15163 // that requires two shifts to mask each input without zext.w.
15164 // FIXME: If the inputs are known zero extended or could be freely
15165 // zero extended, the mask form would be better.
15166 SDValue NewOp0 =
15167 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15168 SDValue NewOp1 =
15169 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
15170 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
15171 DAG.getConstant(32, DL, MVT::i64));
15172 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
15173 DAG.getConstant(32, DL, MVT::i64));
15174 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
15175 : RISCVISD::CLMULR;
15176 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
15177 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
15178 DAG.getConstant(32, DL, MVT::i64));
15179 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15180 return;
15181 }
15182 case Intrinsic::riscv_vmv_x_s: {
15183 EVT VT = N->getValueType(0);
15184 MVT XLenVT = Subtarget.getXLenVT();
15185 if (VT.bitsLT(XLenVT)) {
15186 // Simple case just extract using vmv.x.s and truncate.
15187 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
15188 Subtarget.getXLenVT(), N->getOperand(1));
15189 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
15190 return;
15191 }
15192
15193 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
15194 "Unexpected custom legalization");
15195
15196 // We need to do the move in two steps.
15197 SDValue Vec = N->getOperand(1);
15198 MVT VecVT = Vec.getSimpleValueType();
15199
15200 // First extract the lower XLEN bits of the element.
15201 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
15202
15203 // To extract the upper XLEN bits of the vector element, shift the first
15204 // element right by 32 bits and re-extract the lower XLEN bits.
15205 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
15206
15207 SDValue ThirtyTwoV =
15208 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
15209 DAG.getConstant(32, DL, XLenVT), VL);
15210 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
15211 DAG.getUNDEF(VecVT), Mask, VL);
15212 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
15213
15214 Results.push_back(
15215 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
15216 break;
15217 }
15218 }
15219 break;
15220 }
15221 case ISD::VECREDUCE_ADD:
15222 case ISD::VECREDUCE_AND:
15223 case ISD::VECREDUCE_OR:
15224 case ISD::VECREDUCE_XOR:
15225 case ISD::VECREDUCE_SMAX:
15226 case ISD::VECREDUCE_UMAX:
15227 case ISD::VECREDUCE_SMIN:
15228 case ISD::VECREDUCE_UMIN:
15229 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
15230 Results.push_back(V);
15231 break;
15232 case ISD::VP_REDUCE_ADD:
15233 case ISD::VP_REDUCE_AND:
15234 case ISD::VP_REDUCE_OR:
15235 case ISD::VP_REDUCE_XOR:
15236 case ISD::VP_REDUCE_SMAX:
15237 case ISD::VP_REDUCE_UMAX:
15238 case ISD::VP_REDUCE_SMIN:
15239 case ISD::VP_REDUCE_UMIN:
15240 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
15241 Results.push_back(V);
15242 break;
15243 case ISD::GET_ROUNDING: {
15244 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
15245 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
15246 Results.push_back(Res.getValue(0));
15247 Results.push_back(Res.getValue(1));
15248 break;
15249 }
15250 }
15251}
15252
15253/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
15254/// which corresponds to it.
15255static unsigned getVecReduceOpcode(unsigned Opc) {
15256 switch (Opc) {
15257 default:
15258 llvm_unreachable("Unhandled binary to transform reduction");
15259 case ISD::ADD:
15260 return ISD::VECREDUCE_ADD;
15261 case ISD::UMAX:
15262 return ISD::VECREDUCE_UMAX;
15263 case ISD::SMAX:
15264 return ISD::VECREDUCE_SMAX;
15265 case ISD::UMIN:
15266 return ISD::VECREDUCE_UMIN;
15267 case ISD::SMIN:
15268 return ISD::VECREDUCE_SMIN;
15269 case ISD::AND:
15270 return ISD::VECREDUCE_AND;
15271 case ISD::OR:
15272 return ISD::VECREDUCE_OR;
15273 case ISD::XOR:
15274 return ISD::VECREDUCE_XOR;
15275 case ISD::FADD:
15276 // Note: This is the associative form of the generic reduction opcode.
15277 return ISD::VECREDUCE_FADD;
15278 case ISD::FMAXNUM:
15279 return ISD::VECREDUCE_FMAX;
15280 case ISD::FMINNUM:
15281 return ISD::VECREDUCE_FMIN;
15282 }
15283}
15284
15285/// Perform two related transforms whose purpose is to incrementally recognize
15286/// an explode_vector followed by scalar reduction as a vector reduction node.
15287/// This exists to recover from a deficiency in SLP which can't handle
15288/// forests with multiple roots sharing common nodes. In some cases, one
15289/// of the trees will be vectorized, and the other will remain (unprofitably)
15290/// scalarized.
15291static SDValue
15293 const RISCVSubtarget &Subtarget) {
15294
15295 // This transforms need to run before all integer types have been legalized
15296 // to i64 (so that the vector element type matches the add type), and while
15297 // it's safe to introduce odd sized vector types.
15299 return SDValue();
15300
15301 // Without V, this transform isn't useful. We could form the (illegal)
15302 // operations and let them be scalarized again, but there's really no point.
15303 if (!Subtarget.hasVInstructions())
15304 return SDValue();
15305
15306 const SDLoc DL(N);
15307 const EVT VT = N->getValueType(0);
15308 const unsigned Opc = N->getOpcode();
15309
15310 if (!VT.isInteger()) {
15311 switch (Opc) {
15312 default:
15313 return SDValue();
15314 case ISD::FADD:
15315 // For FADD, we only handle the case with reassociation allowed. We
15316 // could handle strict reduction order, but at the moment, there's no
15317 // known reason to, and the complexity isn't worth it.
15318 if (!N->getFlags().hasAllowReassociation())
15319 return SDValue();
15320 break;
15321 case ISD::FMAXNUM:
15322 case ISD::FMINNUM:
15323 break;
15324 }
15325 }
15326
15327 const unsigned ReduceOpc = getVecReduceOpcode(Opc);
15328 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
15329 "Inconsistent mappings");
15330 SDValue LHS = N->getOperand(0);
15331 SDValue RHS = N->getOperand(1);
15332
15333 if (!LHS.hasOneUse() || !RHS.hasOneUse())
15334 return SDValue();
15335
15336 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
15337 std::swap(LHS, RHS);
15338
15339 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15340 !isa<ConstantSDNode>(RHS.getOperand(1)))
15341 return SDValue();
15342
15343 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
15344 SDValue SrcVec = RHS.getOperand(0);
15345 EVT SrcVecVT = SrcVec.getValueType();
15346 assert(SrcVecVT.getVectorElementType() == VT);
15347 if (SrcVecVT.isScalableVector())
15348 return SDValue();
15349
15350 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
15351 return SDValue();
15352
15353 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
15354 // reduce_op (extract_subvector [2 x VT] from V). This will form the
15355 // root of our reduction tree. TODO: We could extend this to any two
15356 // adjacent aligned constant indices if desired.
15357 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15358 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
15359 uint64_t LHSIdx =
15360 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
15361 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
15362 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
15363 SDValue Vec = DAG.getExtractSubvector(DL, ReduceVT, SrcVec, 0);
15364 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
15365 }
15366 }
15367
15368 // Match (binop (reduce (extract_subvector V, 0),
15369 // (extract_vector_elt V, sizeof(SubVec))))
15370 // into a reduction of one more element from the original vector V.
15371 if (LHS.getOpcode() != ReduceOpc)
15372 return SDValue();
15373
15374 SDValue ReduceVec = LHS.getOperand(0);
15375 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
15376 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
15377 isNullConstant(ReduceVec.getOperand(1)) &&
15378 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
15379 // For illegal types (e.g. 3xi32), most will be combined again into a
15380 // wider (hopefully legal) type. If this is a terminal state, we are
15381 // relying on type legalization here to produce something reasonable
15382 // and this lowering quality could probably be improved. (TODO)
15383 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
15384 SDValue Vec = DAG.getExtractSubvector(DL, ReduceVT, SrcVec, 0);
15385 return DAG.getNode(ReduceOpc, DL, VT, Vec,
15386 ReduceVec->getFlags() & N->getFlags());
15387 }
15388
15389 return SDValue();
15390}
15391
15392
15393// Try to fold (<bop> x, (reduction.<bop> vec, start))
15395 const RISCVSubtarget &Subtarget) {
15396 auto BinOpToRVVReduce = [](unsigned Opc) {
15397 switch (Opc) {
15398 default:
15399 llvm_unreachable("Unhandled binary to transform reduction");
15400 case ISD::ADD:
15401 return RISCVISD::VECREDUCE_ADD_VL;
15402 case ISD::UMAX:
15403 return RISCVISD::VECREDUCE_UMAX_VL;
15404 case ISD::SMAX:
15405 return RISCVISD::VECREDUCE_SMAX_VL;
15406 case ISD::UMIN:
15407 return RISCVISD::VECREDUCE_UMIN_VL;
15408 case ISD::SMIN:
15409 return RISCVISD::VECREDUCE_SMIN_VL;
15410 case ISD::AND:
15411 return RISCVISD::VECREDUCE_AND_VL;
15412 case ISD::OR:
15413 return RISCVISD::VECREDUCE_OR_VL;
15414 case ISD::XOR:
15415 return RISCVISD::VECREDUCE_XOR_VL;
15416 case ISD::FADD:
15417 return RISCVISD::VECREDUCE_FADD_VL;
15418 case ISD::FMAXNUM:
15419 return RISCVISD::VECREDUCE_FMAX_VL;
15420 case ISD::FMINNUM:
15421 return RISCVISD::VECREDUCE_FMIN_VL;
15422 }
15423 };
15424
15425 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
15426 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15427 isNullConstant(V.getOperand(1)) &&
15428 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
15429 };
15430
15431 unsigned Opc = N->getOpcode();
15432 unsigned ReduceIdx;
15433 if (IsReduction(N->getOperand(0), Opc))
15434 ReduceIdx = 0;
15435 else if (IsReduction(N->getOperand(1), Opc))
15436 ReduceIdx = 1;
15437 else
15438 return SDValue();
15439
15440 // Skip if FADD disallows reassociation but the combiner needs.
15441 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
15442 return SDValue();
15443
15444 SDValue Extract = N->getOperand(ReduceIdx);
15445 SDValue Reduce = Extract.getOperand(0);
15446 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
15447 return SDValue();
15448
15449 SDValue ScalarV = Reduce.getOperand(2);
15450 EVT ScalarVT = ScalarV.getValueType();
15451 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
15452 ScalarV.getOperand(0)->isUndef() &&
15453 isNullConstant(ScalarV.getOperand(2)))
15454 ScalarV = ScalarV.getOperand(1);
15455
15456 // Make sure that ScalarV is a splat with VL=1.
15457 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
15458 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
15459 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
15460 return SDValue();
15461
15462 if (!isNonZeroAVL(ScalarV.getOperand(2)))
15463 return SDValue();
15464
15465 // Check the scalar of ScalarV is neutral element
15466 // TODO: Deal with value other than neutral element.
15467 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
15468 0))
15469 return SDValue();
15470
15471 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
15472 // FIXME: We might be able to improve this if operand 0 is undef.
15473 if (!isNonZeroAVL(Reduce.getOperand(5)))
15474 return SDValue();
15475
15476 SDValue NewStart = N->getOperand(1 - ReduceIdx);
15477
15478 SDLoc DL(N);
15479 SDValue NewScalarV =
15480 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
15481 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
15482
15483 // If we looked through an INSERT_SUBVECTOR we need to restore it.
15484 if (ScalarVT != ScalarV.getValueType())
15485 NewScalarV =
15486 DAG.getInsertSubvector(DL, DAG.getUNDEF(ScalarVT), NewScalarV, 0);
15487
15488 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
15489 NewScalarV, Reduce.getOperand(3),
15490 Reduce.getOperand(4), Reduce.getOperand(5)};
15491 SDValue NewReduce =
15492 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
15493 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
15494 Extract.getOperand(1));
15495}
15496
15497// Optimize (add (shl x, c0), (shl y, c1)) ->
15498// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
15499// or
15500// (SLLI (QC.SHLADD x, y, c1 - c0), c0), if 4 <= (c1-c0) <=31.
15502 const RISCVSubtarget &Subtarget) {
15503 // Perform this optimization only in the zba/xandesperf/xqciac/xtheadba
15504 // extension.
15505 if (!Subtarget.hasShlAdd(3))
15506 return SDValue();
15507
15508 // Skip for vector types and larger types.
15509 EVT VT = N->getValueType(0);
15510 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
15511 return SDValue();
15512
15513 // The two operand nodes must be SHL and have no other use.
15514 SDValue N0 = N->getOperand(0);
15515 SDValue N1 = N->getOperand(1);
15516 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
15517 !N0->hasOneUse() || !N1->hasOneUse())
15518 return SDValue();
15519
15520 // Check c0 and c1.
15521 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
15522 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
15523 if (!N0C || !N1C)
15524 return SDValue();
15525 int64_t C0 = N0C->getSExtValue();
15526 int64_t C1 = N1C->getSExtValue();
15527 if (C0 <= 0 || C1 <= 0)
15528 return SDValue();
15529
15530 int64_t Diff = std::abs(C0 - C1);
15531 if (!Subtarget.hasShlAdd(Diff))
15532 return SDValue();
15533
15534 // Build nodes.
15535 SDLoc DL(N);
15536 int64_t Bits = std::min(C0, C1);
15537 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
15538 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
15539 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, NL,
15540 DAG.getTargetConstant(Diff, DL, VT), NS);
15541 return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT));
15542}
15543
15544// Check if this SDValue is an add immediate that is fed by a shift of 1, 2,
15545// or 3.
15547 SelectionDAG &DAG) {
15548 using namespace llvm::SDPatternMatch;
15549
15550 // Looking for a reg-reg add and not an addi.
15551 if (isa<ConstantSDNode>(N->getOperand(1)))
15552 return SDValue();
15553
15554 // Based on testing it seems that performance degrades if the ADDI has
15555 // more than 2 uses.
15556 if (AddI->use_size() > 2)
15557 return SDValue();
15558
15559 APInt AddVal;
15560 SDValue SHLVal;
15561 if (!sd_match(AddI, m_Add(m_Value(SHLVal), m_ConstInt(AddVal))))
15562 return SDValue();
15563
15564 APInt VShift;
15565 if (!sd_match(SHLVal, m_OneUse(m_Shl(m_Value(), m_ConstInt(VShift)))))
15566 return SDValue();
15567
15568 if (VShift.slt(1) || VShift.sgt(3))
15569 return SDValue();
15570
15571 SDLoc DL(N);
15572 EVT VT = N->getValueType(0);
15573 // The shift must be positive but the add can be signed.
15574 uint64_t ShlConst = VShift.getZExtValue();
15575 int64_t AddConst = AddVal.getSExtValue();
15576
15577 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, SHLVal->getOperand(0),
15578 DAG.getTargetConstant(ShlConst, DL, VT), Other);
15579 return DAG.getNode(ISD::ADD, DL, VT, SHADD,
15580 DAG.getSignedConstant(AddConst, DL, VT));
15581}
15582
15583// Optimize (add (add (shl x, c0), c1), y) ->
15584// (ADDI (SH*ADD y, x), c1), if c0 equals to [1|2|3].
15586 const RISCVSubtarget &Subtarget) {
15587 // Perform this optimization only in the zba extension.
15588 if (!ReassocShlAddiAdd || !Subtarget.hasShlAdd(3))
15589 return SDValue();
15590
15591 // Skip for vector types and larger types.
15592 EVT VT = N->getValueType(0);
15593 if (VT != Subtarget.getXLenVT())
15594 return SDValue();
15595
15596 SDValue AddI = N->getOperand(0);
15597 SDValue Other = N->getOperand(1);
15598 if (SDValue V = combineShlAddIAddImpl(N, AddI, Other, DAG))
15599 return V;
15600 if (SDValue V = combineShlAddIAddImpl(N, Other, AddI, DAG))
15601 return V;
15602 return SDValue();
15603}
15604
15605// Combine a constant select operand into its use:
15606//
15607// (and (select cond, -1, c), x)
15608// -> (select cond, x, (and x, c)) [AllOnes=1]
15609// (or (select cond, 0, c), x)
15610// -> (select cond, x, (or x, c)) [AllOnes=0]
15611// (xor (select cond, 0, c), x)
15612// -> (select cond, x, (xor x, c)) [AllOnes=0]
15613// (add (select cond, 0, c), x)
15614// -> (select cond, x, (add x, c)) [AllOnes=0]
15615// (sub x, (select cond, 0, c))
15616// -> (select cond, x, (sub x, c)) [AllOnes=0]
15618 SelectionDAG &DAG, bool AllOnes,
15619 const RISCVSubtarget &Subtarget) {
15620 EVT VT = N->getValueType(0);
15621
15622 // Skip vectors.
15623 if (VT.isVector())
15624 return SDValue();
15625
15626 if (!Subtarget.hasConditionalMoveFusion()) {
15627 // (select cond, x, (and x, c)) has custom lowering with Zicond.
15628 if (!Subtarget.hasCZEROLike() || N->getOpcode() != ISD::AND)
15629 return SDValue();
15630
15631 // Maybe harmful when condition code has multiple use.
15632 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
15633 return SDValue();
15634
15635 // Maybe harmful when VT is wider than XLen.
15636 if (VT.getSizeInBits() > Subtarget.getXLen())
15637 return SDValue();
15638 }
15639
15640 if ((Slct.getOpcode() != ISD::SELECT &&
15641 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
15642 !Slct.hasOneUse())
15643 return SDValue();
15644
15645 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
15647 };
15648
15649 bool SwapSelectOps;
15650 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
15651 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
15652 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
15653 SDValue NonConstantVal;
15654 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
15655 SwapSelectOps = false;
15656 NonConstantVal = FalseVal;
15657 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
15658 SwapSelectOps = true;
15659 NonConstantVal = TrueVal;
15660 } else
15661 return SDValue();
15662
15663 // Slct is now know to be the desired identity constant when CC is true.
15664 TrueVal = OtherOp;
15665 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
15666 // Unless SwapSelectOps says the condition should be false.
15667 if (SwapSelectOps)
15668 std::swap(TrueVal, FalseVal);
15669
15670 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
15671 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
15672 {Slct.getOperand(0), Slct.getOperand(1),
15673 Slct.getOperand(2), TrueVal, FalseVal});
15674
15675 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
15676 {Slct.getOperand(0), TrueVal, FalseVal});
15677}
15678
15679// Attempt combineSelectAndUse on each operand of a commutative operator N.
15681 bool AllOnes,
15682 const RISCVSubtarget &Subtarget) {
15683 SDValue N0 = N->getOperand(0);
15684 SDValue N1 = N->getOperand(1);
15685 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
15686 return Result;
15687 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
15688 return Result;
15689 return SDValue();
15690}
15691
15692// Transform (add (mul x, c0), c1) ->
15693// (add (mul (add x, c1/c0), c0), c1%c0).
15694// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
15695// that should be excluded is when c0*(c1/c0) is simm12, which will lead
15696// to an infinite loop in DAGCombine if transformed.
15697// Or transform (add (mul x, c0), c1) ->
15698// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
15699// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
15700// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
15701// lead to an infinite loop in DAGCombine if transformed.
15702// Or transform (add (mul x, c0), c1) ->
15703// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
15704// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
15705// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
15706// lead to an infinite loop in DAGCombine if transformed.
15707// Or transform (add (mul x, c0), c1) ->
15708// (mul (add x, c1/c0), c0).
15709// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
15711 const RISCVSubtarget &Subtarget) {
15712 // Skip for vector types and larger types.
15713 EVT VT = N->getValueType(0);
15714 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
15715 return SDValue();
15716 // The first operand node must be a MUL and has no other use.
15717 SDValue N0 = N->getOperand(0);
15718 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
15719 return SDValue();
15720 // Check if c0 and c1 match above conditions.
15721 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
15722 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
15723 if (!N0C || !N1C)
15724 return SDValue();
15725 // If N0C has multiple uses it's possible one of the cases in
15726 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
15727 // in an infinite loop.
15728 if (!N0C->hasOneUse())
15729 return SDValue();
15730 int64_t C0 = N0C->getSExtValue();
15731 int64_t C1 = N1C->getSExtValue();
15732 int64_t CA, CB;
15733 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
15734 return SDValue();
15735 // Search for proper CA (non-zero) and CB that both are simm12.
15736 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
15737 !isInt<12>(C0 * (C1 / C0))) {
15738 CA = C1 / C0;
15739 CB = C1 % C0;
15740 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
15741 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
15742 CA = C1 / C0 + 1;
15743 CB = C1 % C0 - C0;
15744 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
15745 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
15746 CA = C1 / C0 - 1;
15747 CB = C1 % C0 + C0;
15748 } else
15749 return SDValue();
15750 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
15751 SDLoc DL(N);
15752 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
15753 DAG.getSignedConstant(CA, DL, VT));
15754 SDValue New1 =
15755 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getSignedConstant(C0, DL, VT));
15756 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getSignedConstant(CB, DL, VT));
15757}
15758
15759// add (zext, zext) -> zext (add (zext, zext))
15760// sub (zext, zext) -> sext (sub (zext, zext))
15761// mul (zext, zext) -> zext (mul (zext, zext))
15762// sdiv (zext, zext) -> zext (sdiv (zext, zext))
15763// udiv (zext, zext) -> zext (udiv (zext, zext))
15764// srem (zext, zext) -> zext (srem (zext, zext))
15765// urem (zext, zext) -> zext (urem (zext, zext))
15766//
15767// where the sum of the extend widths match, and the the range of the bin op
15768// fits inside the width of the narrower bin op. (For profitability on rvv, we
15769// use a power of two for both inner and outer extend.)
15771
15772 EVT VT = N->getValueType(0);
15773 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
15774 return SDValue();
15775
15776 SDValue N0 = N->getOperand(0);
15777 SDValue N1 = N->getOperand(1);
15779 return SDValue();
15780 if (!N0.hasOneUse() || !N1.hasOneUse())
15781 return SDValue();
15782
15783 SDValue Src0 = N0.getOperand(0);
15784 SDValue Src1 = N1.getOperand(0);
15785 EVT SrcVT = Src0.getValueType();
15786 if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||
15787 SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||
15788 SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)
15789 return SDValue();
15790
15791 LLVMContext &C = *DAG.getContext();
15793 EVT NarrowVT = EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());
15794
15795 Src0 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src0), NarrowVT, Src0);
15796 Src1 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src1), NarrowVT, Src1);
15797
15798 // Src0 and Src1 are zero extended, so they're always positive if signed.
15799 //
15800 // sub can produce a negative from two positive operands, so it needs sign
15801 // extended. Other nodes produce a positive from two positive operands, so
15802 // zero extend instead.
15803 unsigned OuterExtend =
15804 N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
15805
15806 return DAG.getNode(
15807 OuterExtend, SDLoc(N), VT,
15808 DAG.getNode(N->getOpcode(), SDLoc(N), NarrowVT, Src0, Src1));
15809}
15810
15811// Try to turn (add (xor bool, 1) -1) into (neg bool).
15813 SDValue N0 = N->getOperand(0);
15814 SDValue N1 = N->getOperand(1);
15815 EVT VT = N->getValueType(0);
15816 SDLoc DL(N);
15817
15818 // RHS should be -1.
15819 if (!isAllOnesConstant(N1))
15820 return SDValue();
15821
15822 // Look for (xor X, 1).
15823 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
15824 return SDValue();
15825
15826 // First xor input should be 0 or 1.
15828 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
15829 return SDValue();
15830
15831 // Emit a negate of the setcc.
15832 return DAG.getNegative(N0.getOperand(0), DL, VT);
15833}
15834
15837 const RISCVSubtarget &Subtarget) {
15838 SelectionDAG &DAG = DCI.DAG;
15839 if (SDValue V = combineAddOfBooleanXor(N, DAG))
15840 return V;
15841 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
15842 return V;
15843 if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) {
15844 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
15845 return V;
15846 if (SDValue V = combineShlAddIAdd(N, DAG, Subtarget))
15847 return V;
15848 }
15849 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
15850 return V;
15851 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
15852 return V;
15853 if (SDValue V = combineBinOpOfZExt(N, DAG))
15854 return V;
15855
15856 // fold (add (select lhs, rhs, cc, 0, y), x) ->
15857 // (select lhs, rhs, cc, x, (add x, y))
15858 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
15859}
15860
15861// Try to turn a sub boolean RHS and constant LHS into an addi.
15863 SDValue N0 = N->getOperand(0);
15864 SDValue N1 = N->getOperand(1);
15865 EVT VT = N->getValueType(0);
15866 SDLoc DL(N);
15867
15868 // Require a constant LHS.
15869 auto *N0C = dyn_cast<ConstantSDNode>(N0);
15870 if (!N0C)
15871 return SDValue();
15872
15873 // All our optimizations involve subtracting 1 from the immediate and forming
15874 // an ADDI. Make sure the new immediate is valid for an ADDI.
15875 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
15876 if (!ImmValMinus1.isSignedIntN(12))
15877 return SDValue();
15878
15879 SDValue NewLHS;
15880 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
15881 // (sub constant, (setcc x, y, eq/neq)) ->
15882 // (add (setcc x, y, neq/eq), constant - 1)
15883 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
15884 EVT SetCCOpVT = N1.getOperand(0).getValueType();
15885 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
15886 return SDValue();
15887 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
15888 NewLHS =
15889 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
15890 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
15891 N1.getOperand(0).getOpcode() == ISD::SETCC) {
15892 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
15893 // Since setcc returns a bool the xor is equivalent to 1-setcc.
15894 NewLHS = N1.getOperand(0);
15895 } else
15896 return SDValue();
15897
15898 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
15899 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
15900}
15901
15902// Looks for (sub (shl X, 8-Y), (shr X, Y)) where the Y-th bit in each byte is
15903// potentially set. It is fine for Y to be 0, meaning that (sub (shl X, 8), X)
15904// is also valid. Replace with (orc.b X). For example, 0b0000_1000_0000_1000 is
15905// valid with Y=3, while 0b0000_1000_0000_0100 is not.
15907 const RISCVSubtarget &Subtarget) {
15908 if (!Subtarget.hasStdExtZbb())
15909 return SDValue();
15910
15911 EVT VT = N->getValueType(0);
15912
15913 if (VT != Subtarget.getXLenVT() && VT != MVT::i32 && VT != MVT::i16)
15914 return SDValue();
15915
15916 SDValue N0 = N->getOperand(0);
15917 SDValue N1 = N->getOperand(1);
15918
15919 if (N0->getOpcode() != ISD::SHL)
15920 return SDValue();
15921
15922 auto *ShAmtCLeft = dyn_cast<ConstantSDNode>(N0.getOperand(1));
15923 if (!ShAmtCLeft)
15924 return SDValue();
15925 unsigned ShiftedAmount = 8 - ShAmtCLeft->getZExtValue();
15926
15927 if (ShiftedAmount >= 8)
15928 return SDValue();
15929
15930 SDValue LeftShiftOperand = N0->getOperand(0);
15931 SDValue RightShiftOperand = N1;
15932
15933 if (ShiftedAmount != 0) { // Right operand must be a right shift.
15934 if (N1->getOpcode() != ISD::SRL)
15935 return SDValue();
15936 auto *ShAmtCRight = dyn_cast<ConstantSDNode>(N1.getOperand(1));
15937 if (!ShAmtCRight || ShAmtCRight->getZExtValue() != ShiftedAmount)
15938 return SDValue();
15939 RightShiftOperand = N1.getOperand(0);
15940 }
15941
15942 // At least one shift should have a single use.
15943 if (!N0.hasOneUse() && (ShiftedAmount == 0 || !N1.hasOneUse()))
15944 return SDValue();
15945
15946 if (LeftShiftOperand != RightShiftOperand)
15947 return SDValue();
15948
15949 APInt Mask = APInt::getSplat(VT.getSizeInBits(), APInt(8, 0x1));
15950 Mask <<= ShiftedAmount;
15951 // Check that X has indeed the right shape (only the Y-th bit can be set in
15952 // every byte).
15953 if (!DAG.MaskedValueIsZero(LeftShiftOperand, ~Mask))
15954 return SDValue();
15955
15956 return DAG.getNode(RISCVISD::ORC_B, SDLoc(N), VT, LeftShiftOperand);
15957}
15958
15960 const RISCVSubtarget &Subtarget) {
15961 if (SDValue V = combineSubOfBoolean(N, DAG))
15962 return V;
15963
15964 EVT VT = N->getValueType(0);
15965 SDValue N0 = N->getOperand(0);
15966 SDValue N1 = N->getOperand(1);
15967 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
15968 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
15969 isNullConstant(N1.getOperand(1)) &&
15970 N1.getValueType() == N1.getOperand(0).getValueType()) {
15971 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
15972 if (CCVal == ISD::SETLT) {
15973 SDLoc DL(N);
15974 unsigned ShAmt = N0.getValueSizeInBits() - 1;
15975 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
15976 DAG.getConstant(ShAmt, DL, VT));
15977 }
15978 }
15979
15980 if (SDValue V = combineBinOpOfZExt(N, DAG))
15981 return V;
15982 if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))
15983 return V;
15984
15985 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
15986 // (select lhs, rhs, cc, x, (sub x, y))
15987 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
15988}
15989
15990// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
15991// Legalizing setcc can introduce xors like this. Doing this transform reduces
15992// the number of xors and may allow the xor to fold into a branch condition.
15994 SDValue N0 = N->getOperand(0);
15995 SDValue N1 = N->getOperand(1);
15996 bool IsAnd = N->getOpcode() == ISD::AND;
15997
15998 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
15999 return SDValue();
16000
16001 if (!N0.hasOneUse() || !N1.hasOneUse())
16002 return SDValue();
16003
16004 SDValue N01 = N0.getOperand(1);
16005 SDValue N11 = N1.getOperand(1);
16006
16007 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
16008 // (xor X, -1) based on the upper bits of the other operand being 0. If the
16009 // operation is And, allow one of the Xors to use -1.
16010 if (isOneConstant(N01)) {
16011 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
16012 return SDValue();
16013 } else if (isOneConstant(N11)) {
16014 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
16015 if (!(IsAnd && isAllOnesConstant(N01)))
16016 return SDValue();
16017 } else
16018 return SDValue();
16019
16020 EVT VT = N->getValueType(0);
16021
16022 SDValue N00 = N0.getOperand(0);
16023 SDValue N10 = N1.getOperand(0);
16024
16025 // The LHS of the xors needs to be 0/1.
16027 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
16028 return SDValue();
16029
16030 // Invert the opcode and insert a new xor.
16031 SDLoc DL(N);
16032 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
16033 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
16034 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
16035}
16036
16037// Fold (vXi8 (trunc (vselect (setltu, X, 256), X, (sext (setgt X, 0))))) to
16038// (vXi8 (trunc (smin (smax X, 0), 255))). This represents saturating a signed
16039// value to an unsigned value. This will be lowered to vmax and series of
16040// vnclipu instructions later. This can be extended to other truncated types
16041// other than i8 by replacing 256 and 255 with the equivalent constants for the
16042// type.
16044 EVT VT = N->getValueType(0);
16045 SDValue N0 = N->getOperand(0);
16046 EVT SrcVT = N0.getValueType();
16047
16048 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16049 if (!VT.isVector() || !TLI.isTypeLegal(VT) || !TLI.isTypeLegal(SrcVT))
16050 return SDValue();
16051
16052 if (N0.getOpcode() != ISD::VSELECT || !N0.hasOneUse())
16053 return SDValue();
16054
16055 SDValue Cond = N0.getOperand(0);
16056 SDValue True = N0.getOperand(1);
16057 SDValue False = N0.getOperand(2);
16058
16059 if (Cond.getOpcode() != ISD::SETCC)
16060 return SDValue();
16061
16062 // FIXME: Support the version of this pattern with the select operands
16063 // swapped.
16064 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16065 if (CCVal != ISD::SETULT)
16066 return SDValue();
16067
16068 SDValue CondLHS = Cond.getOperand(0);
16069 SDValue CondRHS = Cond.getOperand(1);
16070
16071 if (CondLHS != True)
16072 return SDValue();
16073
16074 unsigned ScalarBits = VT.getScalarSizeInBits();
16075
16076 // FIXME: Support other constants.
16077 ConstantSDNode *CondRHSC = isConstOrConstSplat(CondRHS);
16078 if (!CondRHSC || CondRHSC->getAPIntValue() != (1ULL << ScalarBits))
16079 return SDValue();
16080
16081 if (False.getOpcode() != ISD::SIGN_EXTEND)
16082 return SDValue();
16083
16084 False = False.getOperand(0);
16085
16086 if (False.getOpcode() != ISD::SETCC || False.getOperand(0) != True)
16087 return SDValue();
16088
16089 ConstantSDNode *FalseRHSC = isConstOrConstSplat(False.getOperand(1));
16090 if (!FalseRHSC || !FalseRHSC->isZero())
16091 return SDValue();
16092
16093 ISD::CondCode CCVal2 = cast<CondCodeSDNode>(False.getOperand(2))->get();
16094 if (CCVal2 != ISD::SETGT)
16095 return SDValue();
16096
16097 // Emit the signed to unsigned saturation pattern.
16098 SDLoc DL(N);
16099 SDValue Max =
16100 DAG.getNode(ISD::SMAX, DL, SrcVT, True, DAG.getConstant(0, DL, SrcVT));
16101 SDValue Min =
16102 DAG.getNode(ISD::SMIN, DL, SrcVT, Max,
16103 DAG.getConstant((1ULL << ScalarBits) - 1, DL, SrcVT));
16104 return DAG.getNode(ISD::TRUNCATE, DL, VT, Min);
16105}
16106
16108 const RISCVSubtarget &Subtarget) {
16109 SDValue N0 = N->getOperand(0);
16110 EVT VT = N->getValueType(0);
16111
16112 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
16113 // extending X. This is safe since we only need the LSB after the shift and
16114 // shift amounts larger than 31 would produce poison. If we wait until
16115 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
16116 // to use a BEXT instruction.
16117 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
16118 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
16119 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
16120 SDLoc DL(N0);
16121 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
16122 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
16123 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
16124 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
16125 }
16126
16127 return combineTruncSelectToSMaxUSat(N, DAG);
16128}
16129
16130// InstCombinerImpl::transformZExtICmp will narrow a zext of an icmp with a
16131// truncation. But RVV doesn't have truncation instructions for more than twice
16132// the bitwidth.
16133//
16134// E.g. trunc <vscale x 1 x i64> %x to <vscale x 1 x i8> will generate:
16135//
16136// vsetvli a0, zero, e32, m2, ta, ma
16137// vnsrl.wi v12, v8, 0
16138// vsetvli zero, zero, e16, m1, ta, ma
16139// vnsrl.wi v8, v12, 0
16140// vsetvli zero, zero, e8, mf2, ta, ma
16141// vnsrl.wi v8, v8, 0
16142//
16143// So reverse the combine so we generate an vmseq/vmsne again:
16144//
16145// and (lshr (trunc X), ShAmt), 1
16146// -->
16147// zext (icmp ne (and X, (1 << ShAmt)), 0)
16148//
16149// and (lshr (not (trunc X)), ShAmt), 1
16150// -->
16151// zext (icmp eq (and X, (1 << ShAmt)), 0)
16153 const RISCVSubtarget &Subtarget) {
16154 using namespace SDPatternMatch;
16155 SDLoc DL(N);
16156
16157 if (!Subtarget.hasVInstructions())
16158 return SDValue();
16159
16160 EVT VT = N->getValueType(0);
16161 if (!VT.isVector())
16162 return SDValue();
16163
16164 APInt ShAmt;
16165 SDValue Inner;
16166 if (!sd_match(N, m_And(m_OneUse(m_Srl(m_Value(Inner), m_ConstInt(ShAmt))),
16167 m_One())))
16168 return SDValue();
16169
16170 SDValue X;
16171 bool IsNot;
16172 if (sd_match(Inner, m_Not(m_Trunc(m_Value(X)))))
16173 IsNot = true;
16174 else if (sd_match(Inner, m_Trunc(m_Value(X))))
16175 IsNot = false;
16176 else
16177 return SDValue();
16178
16179 EVT WideVT = X.getValueType();
16180 if (VT.getScalarSizeInBits() >= WideVT.getScalarSizeInBits() / 2)
16181 return SDValue();
16182
16183 SDValue Res =
16184 DAG.getNode(ISD::AND, DL, WideVT, X,
16185 DAG.getConstant(1ULL << ShAmt.getZExtValue(), DL, WideVT));
16186 Res = DAG.getSetCC(DL,
16187 EVT::getVectorVT(*DAG.getContext(), MVT::i1,
16188 WideVT.getVectorElementCount()),
16189 Res, DAG.getConstant(0, DL, WideVT),
16190 IsNot ? ISD::SETEQ : ISD::SETNE);
16191 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
16192}
16193
16194// (and (i1) f, (setcc c, 0, ne)) -> (czero.nez f, c)
16195// (and (i1) f, (setcc c, 0, eq)) -> (czero.eqz f, c)
16196// (and (setcc c, 0, ne), (i1) g) -> (czero.nez g, c)
16197// (and (setcc c, 0, eq), (i1) g) -> (czero.eqz g, c)
16199 const RISCVSubtarget &Subtarget) {
16200 if (!Subtarget.hasCZEROLike())
16201 return SDValue();
16202
16203 SDValue N0 = N->getOperand(0);
16204 SDValue N1 = N->getOperand(1);
16205
16206 auto IsEqualCompZero = [](SDValue &V) -> bool {
16207 if (V.getOpcode() == ISD::SETCC && isNullConstant(V.getOperand(1))) {
16208 ISD::CondCode CC = cast<CondCodeSDNode>(V.getOperand(2))->get();
16210 return true;
16211 }
16212 return false;
16213 };
16214
16215 if (!IsEqualCompZero(N0) || !N0.hasOneUse())
16216 std::swap(N0, N1);
16217 if (!IsEqualCompZero(N0) || !N0.hasOneUse())
16218 return SDValue();
16219
16220 KnownBits Known = DAG.computeKnownBits(N1);
16221 if (Known.getMaxValue().ugt(1))
16222 return SDValue();
16223
16224 unsigned CzeroOpcode =
16225 (cast<CondCodeSDNode>(N0.getOperand(2))->get() == ISD::SETNE)
16226 ? RISCVISD::CZERO_EQZ
16227 : RISCVISD::CZERO_NEZ;
16228
16229 EVT VT = N->getValueType(0);
16230 SDLoc DL(N);
16231 return DAG.getNode(CzeroOpcode, DL, VT, N1, N0.getOperand(0));
16232}
16233
16236 SelectionDAG &DAG = DCI.DAG;
16237 if (N->getOpcode() != ISD::AND)
16238 return SDValue();
16239
16240 SDValue N0 = N->getOperand(0);
16241 if (N0.getOpcode() != ISD::ATOMIC_LOAD)
16242 return SDValue();
16243 if (!N0.hasOneUse())
16244 return SDValue();
16245
16248 return SDValue();
16249
16250 EVT LoadedVT = ALoad->getMemoryVT();
16251 ConstantSDNode *MaskConst = dyn_cast<ConstantSDNode>(N->getOperand(1));
16252 if (!MaskConst)
16253 return SDValue();
16254 uint64_t Mask = MaskConst->getZExtValue();
16255 uint64_t ExpectedMask = maskTrailingOnes<uint64_t>(LoadedVT.getSizeInBits());
16256 if (Mask != ExpectedMask)
16257 return SDValue();
16258
16259 SDValue ZextLoad = DAG.getAtomicLoad(
16260 ISD::ZEXTLOAD, SDLoc(N), ALoad->getMemoryVT(), N->getValueType(0),
16261 ALoad->getChain(), ALoad->getBasePtr(), ALoad->getMemOperand());
16262 DCI.CombineTo(N, ZextLoad);
16263 DAG.ReplaceAllUsesOfValueWith(SDValue(N0.getNode(), 1), ZextLoad.getValue(1));
16265 return SDValue(N, 0);
16266}
16267
16268// Combines two comparison operation and logic operation to one selection
16269// operation(min, max) and logic operation. Returns new constructed Node if
16270// conditions for optimization are satisfied.
16273 const RISCVSubtarget &Subtarget) {
16274 SelectionDAG &DAG = DCI.DAG;
16275
16276 SDValue N0 = N->getOperand(0);
16277 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
16278 // extending X. This is safe since we only need the LSB after the shift and
16279 // shift amounts larger than 31 would produce poison. If we wait until
16280 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
16281 // to use a BEXT instruction.
16282 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
16283 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
16284 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
16285 N0.hasOneUse()) {
16286 SDLoc DL(N);
16287 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
16288 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
16289 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
16290 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
16291 DAG.getConstant(1, DL, MVT::i64));
16292 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
16293 }
16294
16295 if (SDValue V = reverseZExtICmpCombine(N, DAG, Subtarget))
16296 return V;
16297 if (DCI.isAfterLegalizeDAG())
16298 if (SDValue V = combineANDOfSETCCToCZERO(N, DAG, Subtarget))
16299 return V;
16300 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16301 return V;
16302 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16303 return V;
16304 if (SDValue V = reduceANDOfAtomicLoad(N, DCI))
16305 return V;
16306
16307 if (DCI.isAfterLegalizeDAG())
16308 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
16309 return V;
16310
16311 // fold (and (select lhs, rhs, cc, -1, y), x) ->
16312 // (select lhs, rhs, cc, x, (and x, y))
16313 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
16314}
16315
16316// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
16317// FIXME: Generalize to other binary operators with same operand.
16319 SelectionDAG &DAG) {
16320 assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
16321
16322 if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
16323 N1.getOpcode() != RISCVISD::CZERO_NEZ ||
16324 !N0.hasOneUse() || !N1.hasOneUse())
16325 return SDValue();
16326
16327 // Should have the same condition.
16328 SDValue Cond = N0.getOperand(1);
16329 if (Cond != N1.getOperand(1))
16330 return SDValue();
16331
16332 SDValue TrueV = N0.getOperand(0);
16333 SDValue FalseV = N1.getOperand(0);
16334
16335 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
16336 TrueV.getOperand(1) != FalseV.getOperand(1) ||
16337 !isOneConstant(TrueV.getOperand(1)) ||
16338 !TrueV.hasOneUse() || !FalseV.hasOneUse())
16339 return SDValue();
16340
16341 EVT VT = N->getValueType(0);
16342 SDLoc DL(N);
16343
16344 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
16345 Cond);
16346 SDValue NewN1 =
16347 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0), Cond);
16348 SDValue NewOr =
16349 DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1, SDNodeFlags::Disjoint);
16350 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
16351}
16352
16353// (xor X, (xor (and X, C2), Y))
16354// ->(qc_insb X, (sra Y, ShAmt), Width, ShAmt)
16355// where C2 is a shifted mask with width = Width and shift = ShAmt
16356// qc_insb might become qc.insb or qc.insbi depending on the operands.
16358 const RISCVSubtarget &Subtarget) {
16359 if (!Subtarget.hasVendorXqcibm())
16360 return SDValue();
16361
16362 using namespace SDPatternMatch;
16363 SDValue Base, Inserted;
16364 APInt CMask;
16365 if (!sd_match(N, m_Xor(m_Value(Base),
16367 m_ConstInt(CMask))),
16368 m_Value(Inserted))))))
16369 return SDValue();
16370
16371 if (N->getValueType(0) != MVT::i32)
16372 return SDValue();
16373 unsigned Width, ShAmt;
16374 if (!CMask.isShiftedMask(ShAmt, Width))
16375 return SDValue();
16376
16377 // Check if all zero bits in CMask are also zero in Inserted
16378 if (!DAG.MaskedValueIsZero(Inserted, ~CMask))
16379 return SDValue();
16380
16381 SDLoc DL(N);
16382
16383 // `Inserted` needs to be right shifted before it is put into the
16384 // instruction.
16385 Inserted = DAG.getNode(ISD::SRA, DL, MVT::i32, Inserted,
16386 DAG.getShiftAmountConstant(ShAmt, MVT::i32, DL));
16387
16388 SDValue Ops[] = {Base, Inserted, DAG.getConstant(Width, DL, MVT::i32),
16389 DAG.getConstant(ShAmt, DL, MVT::i32)};
16390 return DAG.getNode(RISCVISD::QC_INSB, DL, MVT::i32, Ops);
16391}
16392
16394 const RISCVSubtarget &Subtarget) {
16395 if (!Subtarget.hasVendorXqcibm())
16396 return SDValue();
16397
16398 using namespace SDPatternMatch;
16399
16400 SDValue X;
16401 APInt MaskImm;
16402 if (!sd_match(N, m_Or(m_OneUse(m_Value(X)), m_ConstInt(MaskImm))))
16403 return SDValue();
16404
16405 unsigned ShAmt, Width;
16406 if (!MaskImm.isShiftedMask(ShAmt, Width) || MaskImm.isSignedIntN(12))
16407 return SDValue();
16408
16409 if (N->getValueType(0) != MVT::i32)
16410 return SDValue();
16411
16412 // If Zbs is enabled and it is a single bit set we can use BSETI which
16413 // can be compressed to C_BSETI when Xqcibm in enabled.
16414 if (Width == 1 && Subtarget.hasStdExtZbs())
16415 return SDValue();
16416
16417 // If C1 is a shifted mask (but can't be formed as an ORI),
16418 // use a bitfield insert of -1.
16419 // Transform (or x, C1)
16420 // -> (qc.insbi x, -1, width, shift)
16421 SDLoc DL(N);
16422
16423 SDValue Ops[] = {X, DAG.getSignedConstant(-1, DL, MVT::i32),
16424 DAG.getConstant(Width, DL, MVT::i32),
16425 DAG.getConstant(ShAmt, DL, MVT::i32)};
16426 return DAG.getNode(RISCVISD::QC_INSB, DL, MVT::i32, Ops);
16427}
16428
16429// Generate a QC_INSB/QC_INSBI from 'or (and X, MaskImm), OrImm' iff the value
16430// being inserted only sets known zero bits.
16432 const RISCVSubtarget &Subtarget) {
16433 // Supported only in Xqcibm for now.
16434 if (!Subtarget.hasVendorXqcibm())
16435 return SDValue();
16436
16437 using namespace SDPatternMatch;
16438
16439 SDValue Inserted;
16440 APInt MaskImm, OrImm;
16441 if (!sd_match(
16442 N, m_SpecificVT(MVT::i32, m_Or(m_OneUse(m_And(m_Value(Inserted),
16443 m_ConstInt(MaskImm))),
16444 m_ConstInt(OrImm)))))
16445 return SDValue();
16446
16447 // Compute the Known Zero for the AND as this allows us to catch more general
16448 // cases than just looking for AND with imm.
16449 KnownBits Known = DAG.computeKnownBits(N->getOperand(0));
16450
16451 // The bits being inserted must only set those bits that are known to be
16452 // zero.
16453 if (!OrImm.isSubsetOf(Known.Zero)) {
16454 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
16455 // currently handle this case.
16456 return SDValue();
16457 }
16458
16459 unsigned ShAmt, Width;
16460 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
16461 if (!Known.Zero.isShiftedMask(ShAmt, Width))
16462 return SDValue();
16463
16464 // QC_INSB(I) dst, src, #width, #shamt.
16465 SDLoc DL(N);
16466
16467 SDValue ImmNode =
16468 DAG.getSignedConstant(OrImm.getSExtValue() >> ShAmt, DL, MVT::i32);
16469
16470 SDValue Ops[] = {Inserted, ImmNode, DAG.getConstant(Width, DL, MVT::i32),
16471 DAG.getConstant(ShAmt, DL, MVT::i32)};
16472 return DAG.getNode(RISCVISD::QC_INSB, DL, MVT::i32, Ops);
16473}
16474
16476 const RISCVSubtarget &Subtarget) {
16477 SelectionDAG &DAG = DCI.DAG;
16478
16479 if (SDValue V = combineOrToBitfieldInsert(N, DAG, Subtarget))
16480 return V;
16481 if (SDValue V = combineOrAndToBitfieldInsert(N, DAG, Subtarget))
16482 return V;
16483 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16484 return V;
16485 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16486 return V;
16487
16488 if (DCI.isAfterLegalizeDAG())
16489 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
16490 return V;
16491
16492 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
16493 // We may be able to pull a common operation out of the true and false value.
16494 SDValue N0 = N->getOperand(0);
16495 SDValue N1 = N->getOperand(1);
16496 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
16497 return V;
16498 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
16499 return V;
16500
16501 // fold (or (select cond, 0, y), x) ->
16502 // (select cond, x, (or x, y))
16503 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
16504}
16505
16507 const RISCVSubtarget &Subtarget) {
16508 SDValue N0 = N->getOperand(0);
16509 SDValue N1 = N->getOperand(1);
16510
16511 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
16512 // (ADDI (BSET X0, X), -1). If we wait until type legalization, we'll create
16513 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
16514 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
16515 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
16516 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
16517 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
16518 SDLoc DL(N);
16519 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
16520 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
16521 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
16522 SDValue Not = DAG.getNOT(DL, Shl, MVT::i64);
16523 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Not);
16524 }
16525
16526 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
16527 // NOTE: Assumes ROL being legal means ROLW is legal.
16528 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16529 if (N0.getOpcode() == RISCVISD::SLLW &&
16531 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
16532 SDLoc DL(N);
16533 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
16534 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
16535 }
16536
16537 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
16538 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
16539 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
16541 if (ConstN00 && CC == ISD::SETLT) {
16542 EVT VT = N0.getValueType();
16543 SDLoc DL(N0);
16544 const APInt &Imm = ConstN00->getAPIntValue();
16545 if ((Imm + 1).isSignedIntN(12))
16546 return DAG.getSetCC(DL, VT, N0.getOperand(1),
16547 DAG.getConstant(Imm + 1, DL, VT), CC);
16548 }
16549 }
16550
16551 if (SDValue V = combineXorToBitfieldInsert(N, DAG, Subtarget))
16552 return V;
16553
16554 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16555 return V;
16556 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16557 return V;
16558
16559 // fold (xor (select cond, 0, y), x) ->
16560 // (select cond, x, (xor x, y))
16561 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
16562}
16563
16564// Try to expand a multiply to a sequence of shifts and add/subs,
16565// for a machine without native mul instruction.
16567 uint64_t MulAmt) {
16568 SDLoc DL(N);
16569 EVT VT = N->getValueType(0);
16571
16572 SDValue Result = DAG.getConstant(0, DL, N->getValueType(0));
16573 SDValue N0 = N->getOperand(0);
16574
16575 // Find the Non-adjacent form of the multiplier.
16576 for (uint64_t E = MulAmt, I = 0; E && I < BitWidth; ++I, E >>= 1) {
16577 if (E & 1) {
16578 bool IsAdd = (E & 3) == 1;
16579 E -= IsAdd ? 1 : -1;
16580 SDValue ShiftVal = DAG.getNode(ISD::SHL, DL, VT, N0,
16581 DAG.getShiftAmountConstant(I, VT, DL));
16582 ISD::NodeType AddSubOp = IsAdd ? ISD::ADD : ISD::SUB;
16583 Result = DAG.getNode(AddSubOp, DL, VT, Result, ShiftVal);
16584 }
16585 }
16586
16587 return Result;
16588}
16589
16590// X * (2^N +/- 2^M) -> (add/sub (shl X, C1), (shl X, C2))
16592 uint64_t MulAmt) {
16593 uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
16595 uint64_t ShiftAmt1;
16596 if (isPowerOf2_64(MulAmt + MulAmtLowBit)) {
16597 Op = ISD::SUB;
16598 ShiftAmt1 = MulAmt + MulAmtLowBit;
16599 } else if (isPowerOf2_64(MulAmt - MulAmtLowBit)) {
16600 Op = ISD::ADD;
16601 ShiftAmt1 = MulAmt - MulAmtLowBit;
16602 } else {
16603 return SDValue();
16604 }
16605 EVT VT = N->getValueType(0);
16606 SDLoc DL(N);
16607 SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16608 DAG.getConstant(Log2_64(ShiftAmt1), DL, VT));
16609 SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16610 DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT));
16611 return DAG.getNode(Op, DL, VT, Shift1, Shift2);
16612}
16613
16614static SDValue getShlAddShlAdd(SDNode *N, SelectionDAG &DAG, unsigned ShX,
16615 unsigned ShY, bool AddX, unsigned Shift) {
16616 SDLoc DL(N);
16617 EVT VT = N->getValueType(0);
16618 SDValue X = N->getOperand(0);
16619 // Put the shift first if we can fold a zext into the shift forming a slli.uw.
16620 using namespace SDPatternMatch;
16621 if (Shift != 0 &&
16622 sd_match(X, m_And(m_Value(), m_SpecificInt(UINT64_C(0xffffffff))))) {
16623 X = DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(Shift, DL, VT));
16624 Shift = 0;
16625 }
16626 SDValue ShlAdd = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16627 DAG.getTargetConstant(ShY, DL, VT), X);
16628 if (ShX != 0)
16629 ShlAdd = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, ShlAdd,
16630 DAG.getTargetConstant(ShX, DL, VT), AddX ? X : ShlAdd);
16631 if (Shift == 0)
16632 return ShlAdd;
16633 // Otherwise, put the shl last so that it can fold with following instructions
16634 // (e.g. sext or add).
16635 return DAG.getNode(ISD::SHL, DL, VT, ShlAdd, DAG.getConstant(Shift, DL, VT));
16636}
16637
16639 uint64_t MulAmt, unsigned Shift) {
16640 switch (MulAmt) {
16641 // 3/5/9 -> (shYadd X, X)
16642 case 3:
16643 return getShlAddShlAdd(N, DAG, 0, 1, /*AddX=*/false, Shift);
16644 case 5:
16645 return getShlAddShlAdd(N, DAG, 0, 2, /*AddX=*/false, Shift);
16646 case 9:
16647 return getShlAddShlAdd(N, DAG, 0, 3, /*AddX=*/false, Shift);
16648 // 3/5/9 * 3/5/9 -> (shXadd (shYadd X, X), (shYadd X, X))
16649 case 5 * 3:
16650 return getShlAddShlAdd(N, DAG, 2, 1, /*AddX=*/false, Shift);
16651 case 9 * 3:
16652 return getShlAddShlAdd(N, DAG, 3, 1, /*AddX=*/false, Shift);
16653 case 5 * 5:
16654 return getShlAddShlAdd(N, DAG, 2, 2, /*AddX=*/false, Shift);
16655 case 9 * 5:
16656 return getShlAddShlAdd(N, DAG, 3, 2, /*AddX=*/false, Shift);
16657 case 9 * 9:
16658 return getShlAddShlAdd(N, DAG, 3, 3, /*AddX=*/false, Shift);
16659 default:
16660 break;
16661 }
16662
16663 // 2/4/8 * 3/5/9 + 1 -> (shXadd (shYadd X, X), X)
16664 int ShX;
16665 if (int ShY = isShifted359(MulAmt - 1, ShX)) {
16666 assert(ShX != 0 && "MulAmt=4,6,10 handled before");
16667 if (ShX <= 3)
16668 return getShlAddShlAdd(N, DAG, ShX, ShY, /*AddX=*/true, Shift);
16669 }
16670 return SDValue();
16671}
16672
16673// Try to expand a scalar multiply to a faster sequence.
16676 const RISCVSubtarget &Subtarget) {
16677
16678 EVT VT = N->getValueType(0);
16679
16680 // LI + MUL is usually smaller than the alternative sequence.
16682 return SDValue();
16683
16684 if (VT != Subtarget.getXLenVT())
16685 return SDValue();
16686
16687 bool ShouldExpandMul =
16688 (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) ||
16689 !Subtarget.hasStdExtZmmul();
16690 if (!ShouldExpandMul)
16691 return SDValue();
16692
16693 ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
16694 if (!CNode)
16695 return SDValue();
16696 uint64_t MulAmt = CNode->getZExtValue();
16697
16698 // Don't do this if the Xqciac extension is enabled and the MulAmt in simm12.
16699 if (Subtarget.hasVendorXqciac() && isInt<12>(CNode->getSExtValue()))
16700 return SDValue();
16701
16702 // WARNING: The code below is knowingly incorrect with regards to undef
16703 // semantics. We're adding additional uses of X here, and in principle, we
16704 // should be freezing X before doing so. However, adding freeze here causes
16705 // real regressions, and no other target properly freezes X in these cases
16706 // either.
16707 if (Subtarget.hasShlAdd(3)) {
16708 // 3/5/9 * 2^N -> (shl (shXadd X, X), N)
16709 // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
16710 // of 25 which happen to be quite common.
16711 // (2/4/8 * 3/5/9 + 1) * 2^N
16712 unsigned Shift = llvm::countr_zero(MulAmt);
16713 if (SDValue V = expandMulToShlAddShlAdd(N, DAG, MulAmt >> Shift, Shift))
16714 return V;
16715
16716 // If this is a power 2 + 2/4/8, we can use a shift followed by a single
16717 // shXadd. First check if this a sum of two power of 2s because that's
16718 // easy. Then count how many zeros are up to the first bit.
16719 SDValue X = N->getOperand(0);
16720 if (Shift >= 1 && Shift <= 3 && isPowerOf2_64(MulAmt & (MulAmt - 1))) {
16721 unsigned ShiftAmt = llvm::countr_zero((MulAmt & (MulAmt - 1)));
16722 SDLoc DL(N);
16723 SDValue Shift1 =
16724 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
16725 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16726 DAG.getTargetConstant(Shift, DL, VT), Shift1);
16727 }
16728
16729 // TODO: 2^(C1>3) * 3,5,9 +/- 1
16730
16731 // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
16732 if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
16733 unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
16734 if (ScaleShift >= 1 && ScaleShift < 4) {
16735 unsigned ShiftAmt = llvm::countr_zero((MulAmt - 1) & (MulAmt - 2));
16736 SDLoc DL(N);
16737 SDValue Shift1 =
16738 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
16739 return DAG.getNode(
16740 ISD::ADD, DL, VT, Shift1,
16741 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16742 DAG.getTargetConstant(ScaleShift, DL, VT), X));
16743 }
16744 }
16745
16746 // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
16747 for (uint64_t Offset : {3, 5, 9}) {
16748 if (isPowerOf2_64(MulAmt + Offset)) {
16749 unsigned ShAmt = llvm::countr_zero(MulAmt + Offset);
16750 if (ShAmt >= VT.getSizeInBits())
16751 continue;
16752 SDLoc DL(N);
16753 SDValue Shift1 =
16754 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShAmt, DL, VT));
16755 SDValue Mul359 =
16756 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16757 DAG.getTargetConstant(Log2_64(Offset - 1), DL, VT), X);
16758 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
16759 }
16760 }
16761 }
16762
16763 if (SDValue V = expandMulToAddOrSubOfShl(N, DAG, MulAmt))
16764 return V;
16765
16766 if (!Subtarget.hasStdExtZmmul())
16767 return expandMulToNAFSequence(N, DAG, MulAmt);
16768
16769 return SDValue();
16770}
16771
16772// Combine vXi32 (mul (and (lshr X, 15), 0x10001), 0xffff) ->
16773// (bitcast (sra (v2Xi16 (bitcast X)), 15))
16774// Same for other equivalent types with other equivalent constants.
16776 EVT VT = N->getValueType(0);
16777 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16778
16779 // Do this for legal vectors unless they are i1 or i8 vectors.
16780 if (!VT.isVector() || !TLI.isTypeLegal(VT) || VT.getScalarSizeInBits() < 16)
16781 return SDValue();
16782
16783 if (N->getOperand(0).getOpcode() != ISD::AND ||
16784 N->getOperand(0).getOperand(0).getOpcode() != ISD::SRL)
16785 return SDValue();
16786
16787 SDValue And = N->getOperand(0);
16788 SDValue Srl = And.getOperand(0);
16789
16790 APInt V1, V2, V3;
16791 if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) ||
16792 !ISD::isConstantSplatVector(And.getOperand(1).getNode(), V2) ||
16794 return SDValue();
16795
16796 unsigned HalfSize = VT.getScalarSizeInBits() / 2;
16797 if (!V1.isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
16798 V3 != (HalfSize - 1))
16799 return SDValue();
16800
16801 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(),
16802 EVT::getIntegerVT(*DAG.getContext(), HalfSize),
16803 VT.getVectorElementCount() * 2);
16804 SDLoc DL(N);
16805 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, HalfVT, Srl.getOperand(0));
16806 SDValue Sra = DAG.getNode(ISD::SRA, DL, HalfVT, Cast,
16807 DAG.getConstant(HalfSize - 1, DL, HalfVT));
16808 return DAG.getNode(ISD::BITCAST, DL, VT, Sra);
16809}
16810
16813 const RISCVSubtarget &Subtarget) {
16814 EVT VT = N->getValueType(0);
16815 if (!VT.isVector())
16816 return expandMul(N, DAG, DCI, Subtarget);
16817
16818 SDLoc DL(N);
16819 SDValue N0 = N->getOperand(0);
16820 SDValue N1 = N->getOperand(1);
16821 SDValue MulOper;
16822 unsigned AddSubOpc;
16823
16824 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
16825 // (mul x, add (y, 1)) -> (add x, (mul x, y))
16826 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
16827 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
16828 auto IsAddSubWith1 = [&](SDValue V) -> bool {
16829 AddSubOpc = V->getOpcode();
16830 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
16831 SDValue Opnd = V->getOperand(1);
16832 MulOper = V->getOperand(0);
16833 if (AddSubOpc == ISD::SUB)
16834 std::swap(Opnd, MulOper);
16835 if (isOneOrOneSplat(Opnd))
16836 return true;
16837 }
16838 return false;
16839 };
16840
16841 if (IsAddSubWith1(N0)) {
16842 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
16843 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
16844 }
16845
16846 if (IsAddSubWith1(N1)) {
16847 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
16848 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
16849 }
16850
16851 if (SDValue V = combineBinOpOfZExt(N, DAG))
16852 return V;
16853
16855 return V;
16856
16857 return SDValue();
16858}
16859
16860/// According to the property that indexed load/store instructions zero-extend
16861/// their indices, try to narrow the type of index operand.
16862static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
16863 if (isIndexTypeSigned(IndexType))
16864 return false;
16865
16866 if (!N->hasOneUse())
16867 return false;
16868
16869 EVT VT = N.getValueType();
16870 SDLoc DL(N);
16871
16872 // In general, what we're doing here is seeing if we can sink a truncate to
16873 // a smaller element type into the expression tree building our index.
16874 // TODO: We can generalize this and handle a bunch more cases if useful.
16875
16876 // Narrow a buildvector to the narrowest element type. This requires less
16877 // work and less register pressure at high LMUL, and creates smaller constants
16878 // which may be cheaper to materialize.
16879 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
16880 KnownBits Known = DAG.computeKnownBits(N);
16881 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
16882 LLVMContext &C = *DAG.getContext();
16883 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
16884 if (ResultVT.bitsLT(VT.getVectorElementType())) {
16885 N = DAG.getNode(ISD::TRUNCATE, DL,
16886 VT.changeVectorElementType(ResultVT), N);
16887 return true;
16888 }
16889 }
16890
16891 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
16892 if (N.getOpcode() != ISD::SHL)
16893 return false;
16894
16895 SDValue N0 = N.getOperand(0);
16896 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
16897 N0.getOpcode() != RISCVISD::VZEXT_VL)
16898 return false;
16899 if (!N0->hasOneUse())
16900 return false;
16901
16902 APInt ShAmt;
16903 SDValue N1 = N.getOperand(1);
16904 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
16905 return false;
16906
16907 SDValue Src = N0.getOperand(0);
16908 EVT SrcVT = Src.getValueType();
16909 unsigned SrcElen = SrcVT.getScalarSizeInBits();
16910 unsigned ShAmtV = ShAmt.getZExtValue();
16911 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
16912 NewElen = std::max(NewElen, 8U);
16913
16914 // Skip if NewElen is not narrower than the original extended type.
16915 if (NewElen >= N0.getValueType().getScalarSizeInBits())
16916 return false;
16917
16918 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
16919 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
16920
16921 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
16922 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
16923 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
16924 return true;
16925}
16926
16927/// Try to map an integer comparison with size > XLEN to vector instructions
16928/// before type legalization splits it up into chunks.
16929static SDValue
16931 const SDLoc &DL, SelectionDAG &DAG,
16932 const RISCVSubtarget &Subtarget) {
16933 assert(ISD::isIntEqualitySetCC(CC) && "Bad comparison predicate");
16934
16935 if (!Subtarget.hasVInstructions())
16936 return SDValue();
16937
16938 MVT XLenVT = Subtarget.getXLenVT();
16939 EVT OpVT = X.getValueType();
16940 // We're looking for an oversized integer equality comparison.
16941 if (!OpVT.isScalarInteger())
16942 return SDValue();
16943
16944 unsigned OpSize = OpVT.getSizeInBits();
16945 // The size should be larger than XLen and smaller than the maximum vector
16946 // size.
16947 if (OpSize <= Subtarget.getXLen() ||
16948 OpSize > Subtarget.getRealMinVLen() *
16950 return SDValue();
16951
16952 // Don't perform this combine if constructing the vector will be expensive.
16953 auto IsVectorBitCastCheap = [](SDValue X) {
16955 return isa<ConstantSDNode>(X) || X.getValueType().isVector() ||
16956 X.getOpcode() == ISD::LOAD;
16957 };
16958 if (!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y))
16959 return SDValue();
16960
16962 Attribute::NoImplicitFloat))
16963 return SDValue();
16964
16965 // Bail out for non-byte-sized types.
16966 if (!OpVT.isByteSized())
16967 return SDValue();
16968
16969 unsigned VecSize = OpSize / 8;
16970 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, VecSize);
16971 EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, VecSize);
16972
16973 SDValue VecX = DAG.getBitcast(VecVT, X);
16974 SDValue VecY = DAG.getBitcast(VecVT, Y);
16975 SDValue Mask = DAG.getAllOnesConstant(DL, CmpVT);
16976 SDValue VL = DAG.getConstant(VecSize, DL, XLenVT);
16977
16978 SDValue Cmp = DAG.getNode(ISD::VP_SETCC, DL, CmpVT, VecX, VecY,
16979 DAG.getCondCode(ISD::SETNE), Mask, VL);
16980 return DAG.getSetCC(DL, VT,
16981 DAG.getNode(ISD::VP_REDUCE_OR, DL, XLenVT,
16982 DAG.getConstant(0, DL, XLenVT), Cmp, Mask,
16983 VL),
16984 DAG.getConstant(0, DL, XLenVT), CC);
16985}
16986
16989 const RISCVSubtarget &Subtarget) {
16990 SelectionDAG &DAG = DCI.DAG;
16991 SDLoc dl(N);
16992 SDValue N0 = N->getOperand(0);
16993 SDValue N1 = N->getOperand(1);
16994 EVT VT = N->getValueType(0);
16995 EVT OpVT = N0.getValueType();
16996
16997 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
16998 // Looking for an equality compare.
16999 if (!isIntEqualitySetCC(Cond))
17000 return SDValue();
17001
17002 if (SDValue V =
17003 combineVectorSizedSetCCEquality(VT, N0, N1, Cond, dl, DAG, Subtarget))
17004 return V;
17005
17006 if (DCI.isAfterLegalizeDAG() && isa<ConstantSDNode>(N1) &&
17007 N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
17009 const APInt &AndRHSC = N0.getConstantOperandAPInt(1);
17010 // (X & -(1 << C)) == 0 -> (X >> C) == 0 if the AND constant can't use ANDI.
17011 if (isNullConstant(N1) && !isInt<12>(AndRHSC.getSExtValue()) &&
17012 AndRHSC.isNegatedPowerOf2()) {
17013 unsigned ShiftBits = AndRHSC.countr_zero();
17014 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, N0.getOperand(0),
17015 DAG.getConstant(ShiftBits, dl, OpVT));
17016 return DAG.getSetCC(dl, VT, Shift, N1, Cond);
17017 }
17018
17019 // Similar to above but handling the lower 32 bits by using sraiw. Allow
17020 // comparing with constants other than 0 if the constant can be folded into
17021 // addi or xori after shifting.
17022 uint64_t N1Int = cast<ConstantSDNode>(N1)->getZExtValue();
17023 uint64_t AndRHSInt = AndRHSC.getZExtValue();
17024 if (OpVT == MVT::i64 && isUInt<32>(AndRHSInt) &&
17025 isPowerOf2_32(-uint32_t(AndRHSInt)) && (N1Int & AndRHSInt) == N1Int) {
17026 unsigned ShiftBits = llvm::countr_zero(AndRHSInt);
17027 int64_t NewC = SignExtend64<32>(N1Int) >> ShiftBits;
17028 if (NewC >= -2048 && NewC <= 2048) {
17029 SDValue SExt =
17030 DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, OpVT, N0.getOperand(0),
17031 DAG.getValueType(MVT::i32));
17032 SDValue Shift = DAG.getNode(ISD::SRA, dl, OpVT, SExt,
17033 DAG.getConstant(ShiftBits, dl, OpVT));
17034 return DAG.getSetCC(dl, VT, Shift,
17035 DAG.getSignedConstant(NewC, dl, OpVT), Cond);
17036 }
17037 }
17038 }
17039
17040 // Replace (seteq (i64 (and X, 0xffffffff)), C1) with
17041 // (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
17042 // bit 31. Same for setne. C1' may be cheaper to materialize and the
17043 // sext_inreg can become a sext.w instead of a shift pair.
17044 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
17045 return SDValue();
17046
17047 // RHS needs to be a constant.
17048 auto *N1C = dyn_cast<ConstantSDNode>(N1);
17049 if (!N1C)
17050 return SDValue();
17051
17052 // LHS needs to be (and X, 0xffffffff).
17053 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
17055 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
17056 return SDValue();
17057
17058 // Don't do this if the sign bit is provably zero, it will be turned back into
17059 // an AND.
17060 APInt SignMask = APInt::getOneBitSet(64, 31);
17061 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
17062 return SDValue();
17063
17064 const APInt &C1 = N1C->getAPIntValue();
17065
17066 // If the constant is larger than 2^32 - 1 it is impossible for both sides
17067 // to be equal.
17068 if (C1.getActiveBits() > 32)
17069 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
17070
17071 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
17072 N0.getOperand(0), DAG.getValueType(MVT::i32));
17073 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
17074 dl, OpVT), Cond);
17075}
17076
17077static SDValue
17079 const RISCVSubtarget &Subtarget) {
17080 SelectionDAG &DAG = DCI.DAG;
17081 SDValue Src = N->getOperand(0);
17082 EVT VT = N->getValueType(0);
17083 EVT SrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
17084 unsigned Opc = Src.getOpcode();
17085 SDLoc DL(N);
17086
17087 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
17088 // Don't do this with Zhinx. We need to explicitly sign extend the GPR.
17089 if (Opc == RISCVISD::FMV_X_ANYEXTH && SrcVT.bitsGE(MVT::i16) &&
17090 Subtarget.hasStdExtZfhmin())
17091 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, DL, VT, Src.getOperand(0));
17092
17093 // Fold (sext_inreg (shl X, Y), i32) -> (sllw X, Y) iff Y u< 32
17094 if (Opc == ISD::SHL && Subtarget.is64Bit() && SrcVT == MVT::i32 &&
17095 VT == MVT::i64 && !isa<ConstantSDNode>(Src.getOperand(1)) &&
17096 DAG.computeKnownBits(Src.getOperand(1)).countMaxActiveBits() <= 5)
17097 return DAG.getNode(RISCVISD::SLLW, DL, VT, Src.getOperand(0),
17098 Src.getOperand(1));
17099
17100 // Fold (sext_inreg (setcc), i1) -> (sub 0, (setcc))
17101 if (Opc == ISD::SETCC && SrcVT == MVT::i1 && DCI.isAfterLegalizeDAG())
17102 return DAG.getNegative(Src, DL, VT);
17103
17104 // Fold (sext_inreg (xor (setcc), -1), i1) -> (add (setcc), -1)
17105 if (Opc == ISD::XOR && SrcVT == MVT::i1 &&
17106 isAllOnesConstant(Src.getOperand(1)) &&
17107 Src.getOperand(0).getOpcode() == ISD::SETCC && DCI.isAfterLegalizeDAG())
17108 return DAG.getNode(ISD::ADD, DL, VT, Src.getOperand(0),
17109 DAG.getAllOnesConstant(DL, VT));
17110
17111 return SDValue();
17112}
17113
17114namespace {
17115// Forward declaration of the structure holding the necessary information to
17116// apply a combine.
17117struct CombineResult;
17118
17119enum ExtKind : uint8_t {
17120 ZExt = 1 << 0,
17121 SExt = 1 << 1,
17122 FPExt = 1 << 2,
17123 BF16Ext = 1 << 3
17124};
17125/// Helper class for folding sign/zero extensions.
17126/// In particular, this class is used for the following combines:
17127/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
17128/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
17129/// mul | mul_vl -> vwmul(u) | vwmul_su
17130/// shl | shl_vl -> vwsll
17131/// fadd -> vfwadd | vfwadd_w
17132/// fsub -> vfwsub | vfwsub_w
17133/// fmul -> vfwmul
17134/// An object of this class represents an operand of the operation we want to
17135/// combine.
17136/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
17137/// NodeExtensionHelper for `a` and one for `b`.
17138///
17139/// This class abstracts away how the extension is materialized and
17140/// how its number of users affect the combines.
17141///
17142/// In particular:
17143/// - VWADD_W is conceptually == add(op0, sext(op1))
17144/// - VWADDU_W == add(op0, zext(op1))
17145/// - VWSUB_W == sub(op0, sext(op1))
17146/// - VWSUBU_W == sub(op0, zext(op1))
17147/// - VFWADD_W == fadd(op0, fpext(op1))
17148/// - VFWSUB_W == fsub(op0, fpext(op1))
17149/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
17150/// zext|sext(smaller_value).
17151struct NodeExtensionHelper {
17152 /// Records if this operand is like being zero extended.
17153 bool SupportsZExt;
17154 /// Records if this operand is like being sign extended.
17155 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
17156 /// instance, a splat constant (e.g., 3), would support being both sign and
17157 /// zero extended.
17158 bool SupportsSExt;
17159 /// Records if this operand is like being floating point extended.
17160 bool SupportsFPExt;
17161 /// Records if this operand is extended from bf16.
17162 bool SupportsBF16Ext;
17163 /// This boolean captures whether we care if this operand would still be
17164 /// around after the folding happens.
17165 bool EnforceOneUse;
17166 /// Original value that this NodeExtensionHelper represents.
17167 SDValue OrigOperand;
17168
17169 /// Get the value feeding the extension or the value itself.
17170 /// E.g., for zext(a), this would return a.
17171 SDValue getSource() const {
17172 switch (OrigOperand.getOpcode()) {
17173 case ISD::ZERO_EXTEND:
17174 case ISD::SIGN_EXTEND:
17175 case RISCVISD::VSEXT_VL:
17176 case RISCVISD::VZEXT_VL:
17177 case RISCVISD::FP_EXTEND_VL:
17178 return OrigOperand.getOperand(0);
17179 default:
17180 return OrigOperand;
17181 }
17182 }
17183
17184 /// Check if this instance represents a splat.
17185 bool isSplat() const {
17186 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
17187 OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
17188 }
17189
17190 /// Get the extended opcode.
17191 unsigned getExtOpc(ExtKind SupportsExt) const {
17192 switch (SupportsExt) {
17193 case ExtKind::SExt:
17194 return RISCVISD::VSEXT_VL;
17195 case ExtKind::ZExt:
17196 return RISCVISD::VZEXT_VL;
17197 case ExtKind::FPExt:
17198 case ExtKind::BF16Ext:
17199 return RISCVISD::FP_EXTEND_VL;
17200 }
17201 llvm_unreachable("Unknown ExtKind enum");
17202 }
17203
17204 /// Get or create a value that can feed \p Root with the given extension \p
17205 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
17206 /// operand. \see ::getSource().
17207 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
17208 const RISCVSubtarget &Subtarget,
17209 std::optional<ExtKind> SupportsExt) const {
17210 if (!SupportsExt.has_value())
17211 return OrigOperand;
17212
17213 MVT NarrowVT = getNarrowType(Root, *SupportsExt);
17214
17215 SDValue Source = getSource();
17216 assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
17217 if (Source.getValueType() == NarrowVT)
17218 return Source;
17219
17220 unsigned ExtOpc = getExtOpc(*SupportsExt);
17221
17222 // If we need an extension, we should be changing the type.
17223 SDLoc DL(OrigOperand);
17224 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
17225 switch (OrigOperand.getOpcode()) {
17226 case ISD::ZERO_EXTEND:
17227 case ISD::SIGN_EXTEND:
17228 case RISCVISD::VSEXT_VL:
17229 case RISCVISD::VZEXT_VL:
17230 case RISCVISD::FP_EXTEND_VL:
17231 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
17232 case ISD::SPLAT_VECTOR:
17233 return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
17234 case RISCVISD::VMV_V_X_VL:
17235 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
17236 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
17237 case RISCVISD::VFMV_V_F_VL:
17238 Source = Source.getOperand(1);
17239 assert(Source.getOpcode() == ISD::FP_EXTEND && "Unexpected source");
17240 Source = Source.getOperand(0);
17241 assert(Source.getValueType() == NarrowVT.getVectorElementType());
17242 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, NarrowVT,
17243 DAG.getUNDEF(NarrowVT), Source, VL);
17244 default:
17245 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
17246 // and that operand should already have the right NarrowVT so no
17247 // extension should be required at this point.
17248 llvm_unreachable("Unsupported opcode");
17249 }
17250 }
17251
17252 /// Helper function to get the narrow type for \p Root.
17253 /// The narrow type is the type of \p Root where we divided the size of each
17254 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
17255 /// \pre Both the narrow type and the original type should be legal.
17256 static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {
17257 MVT VT = Root->getSimpleValueType(0);
17258
17259 // Determine the narrow size.
17260 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
17261
17262 MVT EltVT = SupportsExt == ExtKind::BF16Ext ? MVT::bf16
17263 : SupportsExt == ExtKind::FPExt
17264 ? MVT::getFloatingPointVT(NarrowSize)
17265 : MVT::getIntegerVT(NarrowSize);
17266
17267 assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&
17268 "Trying to extend something we can't represent");
17269 MVT NarrowVT = MVT::getVectorVT(EltVT, VT.getVectorElementCount());
17270 return NarrowVT;
17271 }
17272
17273 /// Get the opcode to materialize:
17274 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
17275 static unsigned getSExtOpcode(unsigned Opcode) {
17276 switch (Opcode) {
17277 case ISD::ADD:
17278 case RISCVISD::ADD_VL:
17279 case RISCVISD::VWADD_W_VL:
17280 case RISCVISD::VWADDU_W_VL:
17281 case ISD::OR:
17282 case RISCVISD::OR_VL:
17283 return RISCVISD::VWADD_VL;
17284 case ISD::SUB:
17285 case RISCVISD::SUB_VL:
17286 case RISCVISD::VWSUB_W_VL:
17287 case RISCVISD::VWSUBU_W_VL:
17288 return RISCVISD::VWSUB_VL;
17289 case ISD::MUL:
17290 case RISCVISD::MUL_VL:
17291 return RISCVISD::VWMUL_VL;
17292 default:
17293 llvm_unreachable("Unexpected opcode");
17294 }
17295 }
17296
17297 /// Get the opcode to materialize:
17298 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
17299 static unsigned getZExtOpcode(unsigned Opcode) {
17300 switch (Opcode) {
17301 case ISD::ADD:
17302 case RISCVISD::ADD_VL:
17303 case RISCVISD::VWADD_W_VL:
17304 case RISCVISD::VWADDU_W_VL:
17305 case ISD::OR:
17306 case RISCVISD::OR_VL:
17307 return RISCVISD::VWADDU_VL;
17308 case ISD::SUB:
17309 case RISCVISD::SUB_VL:
17310 case RISCVISD::VWSUB_W_VL:
17311 case RISCVISD::VWSUBU_W_VL:
17312 return RISCVISD::VWSUBU_VL;
17313 case ISD::MUL:
17314 case RISCVISD::MUL_VL:
17315 return RISCVISD::VWMULU_VL;
17316 case ISD::SHL:
17317 case RISCVISD::SHL_VL:
17318 return RISCVISD::VWSLL_VL;
17319 default:
17320 llvm_unreachable("Unexpected opcode");
17321 }
17322 }
17323
17324 /// Get the opcode to materialize:
17325 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
17326 static unsigned getFPExtOpcode(unsigned Opcode) {
17327 switch (Opcode) {
17328 case RISCVISD::FADD_VL:
17329 case RISCVISD::VFWADD_W_VL:
17330 return RISCVISD::VFWADD_VL;
17331 case RISCVISD::FSUB_VL:
17332 case RISCVISD::VFWSUB_W_VL:
17333 return RISCVISD::VFWSUB_VL;
17334 case RISCVISD::FMUL_VL:
17335 return RISCVISD::VFWMUL_VL;
17336 case RISCVISD::VFMADD_VL:
17337 return RISCVISD::VFWMADD_VL;
17338 case RISCVISD::VFMSUB_VL:
17339 return RISCVISD::VFWMSUB_VL;
17340 case RISCVISD::VFNMADD_VL:
17341 return RISCVISD::VFWNMADD_VL;
17342 case RISCVISD::VFNMSUB_VL:
17343 return RISCVISD::VFWNMSUB_VL;
17344 default:
17345 llvm_unreachable("Unexpected opcode");
17346 }
17347 }
17348
17349 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
17350 /// newOpcode(a, b).
17351 static unsigned getSUOpcode(unsigned Opcode) {
17352 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
17353 "SU is only supported for MUL");
17354 return RISCVISD::VWMULSU_VL;
17355 }
17356
17357 /// Get the opcode to materialize
17358 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).
17359 static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
17360 switch (Opcode) {
17361 case ISD::ADD:
17362 case RISCVISD::ADD_VL:
17363 case ISD::OR:
17364 case RISCVISD::OR_VL:
17365 return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL
17366 : RISCVISD::VWADDU_W_VL;
17367 case ISD::SUB:
17368 case RISCVISD::SUB_VL:
17369 return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL
17370 : RISCVISD::VWSUBU_W_VL;
17371 case RISCVISD::FADD_VL:
17372 return RISCVISD::VFWADD_W_VL;
17373 case RISCVISD::FSUB_VL:
17374 return RISCVISD::VFWSUB_W_VL;
17375 default:
17376 llvm_unreachable("Unexpected opcode");
17377 }
17378 }
17379
17380 using CombineToTry = std::function<std::optional<CombineResult>(
17381 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
17382 const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
17383 const RISCVSubtarget &)>;
17384
17385 /// Check if this node needs to be fully folded or extended for all users.
17386 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
17387
17388 void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
17389 const RISCVSubtarget &Subtarget) {
17390 unsigned Opc = OrigOperand.getOpcode();
17391 MVT VT = OrigOperand.getSimpleValueType();
17392
17393 assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
17394 "Unexpected Opcode");
17395
17396 // The pasthru must be undef for tail agnostic.
17397 if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
17398 return;
17399
17400 // Get the scalar value.
17401 SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)
17402 : OrigOperand.getOperand(1);
17403
17404 // See if we have enough sign bits or zero bits in the scalar to use a
17405 // widening opcode by splatting to smaller element size.
17406 unsigned EltBits = VT.getScalarSizeInBits();
17407 unsigned ScalarBits = Op.getValueSizeInBits();
17408 // If we're not getting all bits from the element, we need special handling.
17409 if (ScalarBits < EltBits) {
17410 // This should only occur on RV32.
17411 assert(Opc == RISCVISD::VMV_V_X_VL && EltBits == 64 && ScalarBits == 32 &&
17412 !Subtarget.is64Bit() && "Unexpected splat");
17413 // vmv.v.x sign extends narrow inputs.
17414 SupportsSExt = true;
17415
17416 // If the input is positive, then sign extend is also zero extend.
17417 if (DAG.SignBitIsZero(Op))
17418 SupportsZExt = true;
17419
17420 EnforceOneUse = false;
17421 return;
17422 }
17423
17424 unsigned NarrowSize = EltBits / 2;
17425 // If the narrow type cannot be expressed with a legal VMV,
17426 // this is not a valid candidate.
17427 if (NarrowSize < 8)
17428 return;
17429
17430 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
17431 SupportsSExt = true;
17432
17433 if (DAG.MaskedValueIsZero(Op,
17434 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
17435 SupportsZExt = true;
17436
17437 EnforceOneUse = false;
17438 }
17439
17440 bool isSupportedFPExtend(MVT NarrowEltVT, const RISCVSubtarget &Subtarget) {
17441 return (NarrowEltVT == MVT::f32 ||
17442 (NarrowEltVT == MVT::f16 && Subtarget.hasVInstructionsF16()));
17443 }
17444
17445 bool isSupportedBF16Extend(MVT NarrowEltVT, const RISCVSubtarget &Subtarget) {
17446 return NarrowEltVT == MVT::bf16 && Subtarget.hasStdExtZvfbfwma();
17447 }
17448
17449 /// Helper method to set the various fields of this struct based on the
17450 /// type of \p Root.
17451 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
17452 const RISCVSubtarget &Subtarget) {
17453 SupportsZExt = false;
17454 SupportsSExt = false;
17455 SupportsFPExt = false;
17456 SupportsBF16Ext = false;
17457 EnforceOneUse = true;
17458 unsigned Opc = OrigOperand.getOpcode();
17459 // For the nodes we handle below, we end up using their inputs directly: see
17460 // getSource(). However since they either don't have a passthru or we check
17461 // that their passthru is undef, we can safely ignore their mask and VL.
17462 switch (Opc) {
17463 case ISD::ZERO_EXTEND:
17464 case ISD::SIGN_EXTEND: {
17465 MVT VT = OrigOperand.getSimpleValueType();
17466 if (!VT.isVector())
17467 break;
17468
17469 SDValue NarrowElt = OrigOperand.getOperand(0);
17470 MVT NarrowVT = NarrowElt.getSimpleValueType();
17471 // i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
17472 if (NarrowVT.getVectorElementType() == MVT::i1)
17473 break;
17474
17475 SupportsZExt = Opc == ISD::ZERO_EXTEND;
17476 SupportsSExt = Opc == ISD::SIGN_EXTEND;
17477 break;
17478 }
17479 case RISCVISD::VZEXT_VL:
17480 SupportsZExt = true;
17481 break;
17482 case RISCVISD::VSEXT_VL:
17483 SupportsSExt = true;
17484 break;
17485 case RISCVISD::FP_EXTEND_VL: {
17486 MVT NarrowEltVT =
17488 if (isSupportedFPExtend(NarrowEltVT, Subtarget))
17489 SupportsFPExt = true;
17490 if (isSupportedBF16Extend(NarrowEltVT, Subtarget))
17491 SupportsBF16Ext = true;
17492
17493 break;
17494 }
17495 case ISD::SPLAT_VECTOR:
17496 case RISCVISD::VMV_V_X_VL:
17497 fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
17498 break;
17499 case RISCVISD::VFMV_V_F_VL: {
17500 MVT VT = OrigOperand.getSimpleValueType();
17501
17502 if (!OrigOperand.getOperand(0).isUndef())
17503 break;
17504
17505 SDValue Op = OrigOperand.getOperand(1);
17506 if (Op.getOpcode() != ISD::FP_EXTEND)
17507 break;
17508
17509 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
17510 unsigned ScalarBits = Op.getOperand(0).getValueSizeInBits();
17511 if (NarrowSize != ScalarBits)
17512 break;
17513
17514 if (isSupportedFPExtend(Op.getOperand(0).getSimpleValueType(), Subtarget))
17515 SupportsFPExt = true;
17516 if (isSupportedBF16Extend(Op.getOperand(0).getSimpleValueType(),
17517 Subtarget))
17518 SupportsBF16Ext = true;
17519 break;
17520 }
17521 default:
17522 break;
17523 }
17524 }
17525
17526 /// Check if \p Root supports any extension folding combines.
17527 static bool isSupportedRoot(const SDNode *Root,
17528 const RISCVSubtarget &Subtarget) {
17529 switch (Root->getOpcode()) {
17530 case ISD::ADD:
17531 case ISD::SUB:
17532 case ISD::MUL: {
17533 return Root->getValueType(0).isScalableVector();
17534 }
17535 case ISD::OR: {
17536 return Root->getValueType(0).isScalableVector() &&
17537 Root->getFlags().hasDisjoint();
17538 }
17539 // Vector Widening Integer Add/Sub/Mul Instructions
17540 case RISCVISD::ADD_VL:
17541 case RISCVISD::MUL_VL:
17542 case RISCVISD::VWADD_W_VL:
17543 case RISCVISD::VWADDU_W_VL:
17544 case RISCVISD::SUB_VL:
17545 case RISCVISD::VWSUB_W_VL:
17546 case RISCVISD::VWSUBU_W_VL:
17547 // Vector Widening Floating-Point Add/Sub/Mul Instructions
17548 case RISCVISD::FADD_VL:
17549 case RISCVISD::FSUB_VL:
17550 case RISCVISD::FMUL_VL:
17551 case RISCVISD::VFWADD_W_VL:
17552 case RISCVISD::VFWSUB_W_VL:
17553 return true;
17554 case RISCVISD::OR_VL:
17555 return Root->getFlags().hasDisjoint();
17556 case ISD::SHL:
17557 return Root->getValueType(0).isScalableVector() &&
17558 Subtarget.hasStdExtZvbb();
17559 case RISCVISD::SHL_VL:
17560 return Subtarget.hasStdExtZvbb();
17561 case RISCVISD::VFMADD_VL:
17562 case RISCVISD::VFNMSUB_VL:
17563 case RISCVISD::VFNMADD_VL:
17564 case RISCVISD::VFMSUB_VL:
17565 return true;
17566 default:
17567 return false;
17568 }
17569 }
17570
17571 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
17572 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
17573 const RISCVSubtarget &Subtarget) {
17574 assert(isSupportedRoot(Root, Subtarget) &&
17575 "Trying to build an helper with an "
17576 "unsupported root");
17577 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
17579 OrigOperand = Root->getOperand(OperandIdx);
17580
17581 unsigned Opc = Root->getOpcode();
17582 switch (Opc) {
17583 // We consider
17584 // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))
17585 // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))
17586 // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))
17587 case RISCVISD::VWADD_W_VL:
17588 case RISCVISD::VWADDU_W_VL:
17589 case RISCVISD::VWSUB_W_VL:
17590 case RISCVISD::VWSUBU_W_VL:
17591 case RISCVISD::VFWADD_W_VL:
17592 case RISCVISD::VFWSUB_W_VL:
17593 // Operand 1 can't be changed.
17594 if (OperandIdx == 1)
17595 break;
17596 [[fallthrough]];
17597 default:
17598 fillUpExtensionSupport(Root, DAG, Subtarget);
17599 break;
17600 }
17601 }
17602
17603 /// Helper function to get the Mask and VL from \p Root.
17604 static std::pair<SDValue, SDValue>
17605 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
17606 const RISCVSubtarget &Subtarget) {
17607 assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
17608 switch (Root->getOpcode()) {
17609 case ISD::ADD:
17610 case ISD::SUB:
17611 case ISD::MUL:
17612 case ISD::OR:
17613 case ISD::SHL: {
17614 SDLoc DL(Root);
17615 MVT VT = Root->getSimpleValueType(0);
17616 return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
17617 }
17618 default:
17619 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
17620 }
17621 }
17622
17623 /// Helper function to check if \p N is commutative with respect to the
17624 /// foldings that are supported by this class.
17625 static bool isCommutative(const SDNode *N) {
17626 switch (N->getOpcode()) {
17627 case ISD::ADD:
17628 case ISD::MUL:
17629 case ISD::OR:
17630 case RISCVISD::ADD_VL:
17631 case RISCVISD::MUL_VL:
17632 case RISCVISD::OR_VL:
17633 case RISCVISD::FADD_VL:
17634 case RISCVISD::FMUL_VL:
17635 case RISCVISD::VFMADD_VL:
17636 case RISCVISD::VFNMSUB_VL:
17637 case RISCVISD::VFNMADD_VL:
17638 case RISCVISD::VFMSUB_VL:
17639 return true;
17640 case RISCVISD::VWADD_W_VL:
17641 case RISCVISD::VWADDU_W_VL:
17642 case ISD::SUB:
17643 case RISCVISD::SUB_VL:
17644 case RISCVISD::VWSUB_W_VL:
17645 case RISCVISD::VWSUBU_W_VL:
17646 case RISCVISD::VFWADD_W_VL:
17647 case RISCVISD::FSUB_VL:
17648 case RISCVISD::VFWSUB_W_VL:
17649 case ISD::SHL:
17650 case RISCVISD::SHL_VL:
17651 return false;
17652 default:
17653 llvm_unreachable("Unexpected opcode");
17654 }
17655 }
17656
17657 /// Get a list of combine to try for folding extensions in \p Root.
17658 /// Note that each returned CombineToTry function doesn't actually modify
17659 /// anything. Instead they produce an optional CombineResult that if not None,
17660 /// need to be materialized for the combine to be applied.
17661 /// \see CombineResult::materialize.
17662 /// If the related CombineToTry function returns std::nullopt, that means the
17663 /// combine didn't match.
17664 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
17665};
17666
17667/// Helper structure that holds all the necessary information to materialize a
17668/// combine that does some extension folding.
17669struct CombineResult {
17670 /// Opcode to be generated when materializing the combine.
17671 unsigned TargetOpcode;
17672 // No value means no extension is needed.
17673 std::optional<ExtKind> LHSExt;
17674 std::optional<ExtKind> RHSExt;
17675 /// Root of the combine.
17676 SDNode *Root;
17677 /// LHS of the TargetOpcode.
17678 NodeExtensionHelper LHS;
17679 /// RHS of the TargetOpcode.
17680 NodeExtensionHelper RHS;
17681
17682 CombineResult(unsigned TargetOpcode, SDNode *Root,
17683 const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
17684 const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
17685 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),
17686 LHS(LHS), RHS(RHS) {}
17687
17688 /// Return a value that uses TargetOpcode and that can be used to replace
17689 /// Root.
17690 /// The actual replacement is *not* done in that method.
17691 SDValue materialize(SelectionDAG &DAG,
17692 const RISCVSubtarget &Subtarget) const {
17693 SDValue Mask, VL, Passthru;
17694 std::tie(Mask, VL) =
17695 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
17696 switch (Root->getOpcode()) {
17697 default:
17698 Passthru = Root->getOperand(2);
17699 break;
17700 case ISD::ADD:
17701 case ISD::SUB:
17702 case ISD::MUL:
17703 case ISD::OR:
17704 case ISD::SHL:
17705 Passthru = DAG.getUNDEF(Root->getValueType(0));
17706 break;
17707 }
17708 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
17709 LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),
17710 RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),
17711 Passthru, Mask, VL);
17712 }
17713};
17714
17715/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
17716/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
17717/// are zext) and LHS and RHS can be folded into Root.
17718/// AllowExtMask define which form `ext` can take in this pattern.
17719///
17720/// \note If the pattern can match with both zext and sext, the returned
17721/// CombineResult will feature the zext result.
17722///
17723/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17724/// can be used to apply the pattern.
17725static std::optional<CombineResult>
17726canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
17727 const NodeExtensionHelper &RHS,
17728 uint8_t AllowExtMask, SelectionDAG &DAG,
17729 const RISCVSubtarget &Subtarget) {
17730 if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)
17731 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
17732 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
17733 /*RHSExt=*/{ExtKind::ZExt});
17734 if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)
17735 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
17736 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
17737 /*RHSExt=*/{ExtKind::SExt});
17738 if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)
17739 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
17740 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
17741 /*RHSExt=*/{ExtKind::FPExt});
17742 if ((AllowExtMask & ExtKind::BF16Ext) && LHS.SupportsBF16Ext &&
17743 RHS.SupportsBF16Ext)
17744 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
17745 Root, LHS, /*LHSExt=*/{ExtKind::BF16Ext}, RHS,
17746 /*RHSExt=*/{ExtKind::BF16Ext});
17747 return std::nullopt;
17748}
17749
17750/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
17751/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
17752/// are zext) and LHS and RHS can be folded into Root.
17753///
17754/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17755/// can be used to apply the pattern.
17756static std::optional<CombineResult>
17757canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
17758 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17759 const RISCVSubtarget &Subtarget) {
17760 return canFoldToVWWithSameExtensionImpl(
17761 Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,
17762 Subtarget);
17763}
17764
17765/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
17766///
17767/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17768/// can be used to apply the pattern.
17769static std::optional<CombineResult>
17770canFoldToVWWithSameExtZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
17771 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17772 const RISCVSubtarget &Subtarget) {
17773 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,
17774 Subtarget);
17775}
17776
17777/// Check if \p Root follows a pattern Root(bf16ext(LHS), bf16ext(RHS))
17778///
17779/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17780/// can be used to apply the pattern.
17781static std::optional<CombineResult>
17782canFoldToVWWithSameExtBF16(SDNode *Root, const NodeExtensionHelper &LHS,
17783 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17784 const RISCVSubtarget &Subtarget) {
17785 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::BF16Ext, DAG,
17786 Subtarget);
17787}
17788
17789/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
17790///
17791/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17792/// can be used to apply the pattern.
17793static std::optional<CombineResult>
17794canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
17795 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17796 const RISCVSubtarget &Subtarget) {
17797 if (RHS.SupportsFPExt)
17798 return CombineResult(
17799 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),
17800 Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt});
17801
17802 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
17803 // sext/zext?
17804 // Control this behavior behind an option (AllowSplatInVW_W) for testing
17805 // purposes.
17806 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
17807 return CombineResult(
17808 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,
17809 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt});
17810 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
17811 return CombineResult(
17812 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,
17813 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt});
17814 return std::nullopt;
17815}
17816
17817/// Check if \p Root follows a pattern Root(sext(LHS), RHS)
17818///
17819/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17820/// can be used to apply the pattern.
17821static std::optional<CombineResult>
17822canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
17823 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17824 const RISCVSubtarget &Subtarget) {
17825 if (LHS.SupportsSExt)
17826 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
17827 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
17828 /*RHSExt=*/std::nullopt);
17829 return std::nullopt;
17830}
17831
17832/// Check if \p Root follows a pattern Root(zext(LHS), RHS)
17833///
17834/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17835/// can be used to apply the pattern.
17836static std::optional<CombineResult>
17837canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
17838 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17839 const RISCVSubtarget &Subtarget) {
17840 if (LHS.SupportsZExt)
17841 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
17842 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
17843 /*RHSExt=*/std::nullopt);
17844 return std::nullopt;
17845}
17846
17847/// Check if \p Root follows a pattern Root(fpext(LHS), RHS)
17848///
17849/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17850/// can be used to apply the pattern.
17851static std::optional<CombineResult>
17852canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS,
17853 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17854 const RISCVSubtarget &Subtarget) {
17855 if (LHS.SupportsFPExt)
17856 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
17857 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
17858 /*RHSExt=*/std::nullopt);
17859 return std::nullopt;
17860}
17861
17862/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
17863///
17864/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
17865/// can be used to apply the pattern.
17866static std::optional<CombineResult>
17867canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
17868 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
17869 const RISCVSubtarget &Subtarget) {
17870
17871 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
17872 return std::nullopt;
17873 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
17874 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
17875 /*RHSExt=*/{ExtKind::ZExt});
17876}
17877
17879NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
17880 SmallVector<CombineToTry> Strategies;
17881 switch (Root->getOpcode()) {
17882 case ISD::ADD:
17883 case ISD::SUB:
17884 case ISD::OR:
17885 case RISCVISD::ADD_VL:
17886 case RISCVISD::SUB_VL:
17887 case RISCVISD::OR_VL:
17888 case RISCVISD::FADD_VL:
17889 case RISCVISD::FSUB_VL:
17890 // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub
17891 Strategies.push_back(canFoldToVWWithSameExtension);
17892 // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w
17893 Strategies.push_back(canFoldToVW_W);
17894 break;
17895 case RISCVISD::FMUL_VL:
17896 case RISCVISD::VFMADD_VL:
17897 case RISCVISD::VFMSUB_VL:
17898 case RISCVISD::VFNMADD_VL:
17899 case RISCVISD::VFNMSUB_VL:
17900 Strategies.push_back(canFoldToVWWithSameExtension);
17901 if (Root->getOpcode() == RISCVISD::VFMADD_VL)
17902 Strategies.push_back(canFoldToVWWithSameExtBF16);
17903 break;
17904 case ISD::MUL:
17905 case RISCVISD::MUL_VL:
17906 // mul -> vwmul(u)
17907 Strategies.push_back(canFoldToVWWithSameExtension);
17908 // mul -> vwmulsu
17909 Strategies.push_back(canFoldToVW_SU);
17910 break;
17911 case ISD::SHL:
17912 case RISCVISD::SHL_VL:
17913 // shl -> vwsll
17914 Strategies.push_back(canFoldToVWWithSameExtZEXT);
17915 break;
17916 case RISCVISD::VWADD_W_VL:
17917 case RISCVISD::VWSUB_W_VL:
17918 // vwadd_w|vwsub_w -> vwadd|vwsub
17919 Strategies.push_back(canFoldToVWWithSEXT);
17920 break;
17921 case RISCVISD::VWADDU_W_VL:
17922 case RISCVISD::VWSUBU_W_VL:
17923 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
17924 Strategies.push_back(canFoldToVWWithZEXT);
17925 break;
17926 case RISCVISD::VFWADD_W_VL:
17927 case RISCVISD::VFWSUB_W_VL:
17928 // vfwadd_w|vfwsub_w -> vfwadd|vfwsub
17929 Strategies.push_back(canFoldToVWWithFPEXT);
17930 break;
17931 default:
17932 llvm_unreachable("Unexpected opcode");
17933 }
17934 return Strategies;
17935}
17936} // End anonymous namespace.
17937
17939 // TODO: Extend this to other binops using generic identity logic
17940 assert(N->getOpcode() == RISCVISD::ADD_VL);
17941 SDValue A = N->getOperand(0);
17942 SDValue B = N->getOperand(1);
17943 SDValue Passthru = N->getOperand(2);
17944 if (!Passthru.isUndef())
17945 // TODO:This could be a vmerge instead
17946 return SDValue();
17947 ;
17949 return A;
17950 // Peek through fixed to scalable
17951 if (B.getOpcode() == ISD::INSERT_SUBVECTOR && B.getOperand(0).isUndef() &&
17952 ISD::isConstantSplatVectorAllZeros(B.getOperand(1).getNode()))
17953 return A;
17954 return SDValue();
17955}
17956
17957/// Combine a binary or FMA operation to its equivalent VW or VW_W form.
17958/// The supported combines are:
17959/// add | add_vl | or disjoint | or_vl disjoint -> vwadd(u) | vwadd(u)_w
17960/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
17961/// mul | mul_vl -> vwmul(u) | vwmul_su
17962/// shl | shl_vl -> vwsll
17963/// fadd_vl -> vfwadd | vfwadd_w
17964/// fsub_vl -> vfwsub | vfwsub_w
17965/// fmul_vl -> vfwmul
17966/// vwadd_w(u) -> vwadd(u)
17967/// vwsub_w(u) -> vwsub(u)
17968/// vfwadd_w -> vfwadd
17969/// vfwsub_w -> vfwsub
17972 const RISCVSubtarget &Subtarget) {
17973 SelectionDAG &DAG = DCI.DAG;
17974 if (DCI.isBeforeLegalize())
17975 return SDValue();
17976
17977 if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))
17978 return SDValue();
17979
17980 SmallVector<SDNode *> Worklist;
17981 SmallPtrSet<SDNode *, 8> Inserted;
17982 SmallPtrSet<SDNode *, 8> ExtensionsToRemove;
17983 Worklist.push_back(N);
17984 Inserted.insert(N);
17985 SmallVector<CombineResult> CombinesToApply;
17986
17987 while (!Worklist.empty()) {
17988 SDNode *Root = Worklist.pop_back_val();
17989
17990 NodeExtensionHelper LHS(Root, 0, DAG, Subtarget);
17991 NodeExtensionHelper RHS(Root, 1, DAG, Subtarget);
17992 auto AppendUsersIfNeeded =
17993 [&Worklist, &Subtarget, &Inserted,
17994 &ExtensionsToRemove](const NodeExtensionHelper &Op) {
17995 if (Op.needToPromoteOtherUsers()) {
17996 // Remember that we're supposed to remove this extension.
17997 ExtensionsToRemove.insert(Op.OrigOperand.getNode());
17998 for (SDUse &Use : Op.OrigOperand->uses()) {
17999 SDNode *TheUser = Use.getUser();
18000 if (!NodeExtensionHelper::isSupportedRoot(TheUser, Subtarget))
18001 return false;
18002 // We only support the first 2 operands of FMA.
18003 if (Use.getOperandNo() >= 2)
18004 return false;
18005 if (Inserted.insert(TheUser).second)
18006 Worklist.push_back(TheUser);
18007 }
18008 }
18009 return true;
18010 };
18011
18012 // Control the compile time by limiting the number of node we look at in
18013 // total.
18014 if (Inserted.size() > ExtensionMaxWebSize)
18015 return SDValue();
18016
18018 NodeExtensionHelper::getSupportedFoldings(Root);
18019
18020 assert(!FoldingStrategies.empty() && "Nothing to be folded");
18021 bool Matched = false;
18022 for (int Attempt = 0;
18023 (Attempt != 1 + NodeExtensionHelper::isCommutative(Root)) && !Matched;
18024 ++Attempt) {
18025
18026 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
18027 FoldingStrategies) {
18028 std::optional<CombineResult> Res =
18029 FoldingStrategy(Root, LHS, RHS, DAG, Subtarget);
18030 if (Res) {
18031 // If this strategy wouldn't remove an extension we're supposed to
18032 // remove, reject it.
18033 if (!Res->LHSExt.has_value() &&
18034 ExtensionsToRemove.contains(LHS.OrigOperand.getNode()))
18035 continue;
18036 if (!Res->RHSExt.has_value() &&
18037 ExtensionsToRemove.contains(RHS.OrigOperand.getNode()))
18038 continue;
18039
18040 Matched = true;
18041 CombinesToApply.push_back(*Res);
18042 // All the inputs that are extended need to be folded, otherwise
18043 // we would be leaving the old input (since it is may still be used),
18044 // and the new one.
18045 if (Res->LHSExt.has_value())
18046 if (!AppendUsersIfNeeded(LHS))
18047 return SDValue();
18048 if (Res->RHSExt.has_value())
18049 if (!AppendUsersIfNeeded(RHS))
18050 return SDValue();
18051 break;
18052 }
18053 }
18054 std::swap(LHS, RHS);
18055 }
18056 // Right now we do an all or nothing approach.
18057 if (!Matched)
18058 return SDValue();
18059 }
18060 // Store the value for the replacement of the input node separately.
18061 SDValue InputRootReplacement;
18062 // We do the RAUW after we materialize all the combines, because some replaced
18063 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
18064 // some of these nodes may appear in the NodeExtensionHelpers of some of the
18065 // yet-to-be-visited CombinesToApply roots.
18067 ValuesToReplace.reserve(CombinesToApply.size());
18068 for (CombineResult Res : CombinesToApply) {
18069 SDValue NewValue = Res.materialize(DAG, Subtarget);
18070 if (!InputRootReplacement) {
18071 assert(Res.Root == N &&
18072 "First element is expected to be the current node");
18073 InputRootReplacement = NewValue;
18074 } else {
18075 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
18076 }
18077 }
18078 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
18079 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
18080 DCI.AddToWorklist(OldNewValues.second.getNode());
18081 }
18082 return InputRootReplacement;
18083}
18084
18085// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
18086// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
18087// y will be the Passthru and cond will be the Mask.
18089 unsigned Opc = N->getOpcode();
18090 assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||
18091 Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);
18092
18093 SDValue Y = N->getOperand(0);
18094 SDValue MergeOp = N->getOperand(1);
18095 unsigned MergeOpc = MergeOp.getOpcode();
18096
18097 if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
18098 return SDValue();
18099
18100 SDValue X = MergeOp->getOperand(1);
18101
18102 if (!MergeOp.hasOneUse())
18103 return SDValue();
18104
18105 // Passthru should be undef
18106 SDValue Passthru = N->getOperand(2);
18107 if (!Passthru.isUndef())
18108 return SDValue();
18109
18110 // Mask should be all ones
18111 SDValue Mask = N->getOperand(3);
18112 if (Mask.getOpcode() != RISCVISD::VMSET_VL)
18113 return SDValue();
18114
18115 // False value of MergeOp should be all zeros
18116 SDValue Z = MergeOp->getOperand(2);
18117
18118 if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
18119 (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))
18120 Z = Z.getOperand(1);
18121
18122 if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))
18123 return SDValue();
18124
18125 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),
18126 {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},
18127 N->getFlags());
18128}
18129
18132 const RISCVSubtarget &Subtarget) {
18133 [[maybe_unused]] unsigned Opc = N->getOpcode();
18134 assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||
18135 Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);
18136
18137 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
18138 return V;
18139
18140 return combineVWADDSUBWSelect(N, DCI.DAG);
18141}
18142
18143// Helper function for performMemPairCombine.
18144// Try to combine the memory loads/stores LSNode1 and LSNode2
18145// into a single memory pair operation.
18147 LSBaseSDNode *LSNode2, SDValue BasePtr,
18148 uint64_t Imm) {
18150 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
18151
18152 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
18153 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
18154 return SDValue();
18155
18157 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
18158
18159 // The new operation has twice the width.
18160 MVT XLenVT = Subtarget.getXLenVT();
18161 EVT MemVT = LSNode1->getMemoryVT();
18162 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
18163 MachineMemOperand *MMO = LSNode1->getMemOperand();
18165 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
18166
18167 if (LSNode1->getOpcode() == ISD::LOAD) {
18168 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
18169 unsigned Opcode;
18170 if (MemVT == MVT::i32)
18171 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
18172 else
18173 Opcode = RISCVISD::TH_LDD;
18174
18175 SDValue Res = DAG.getMemIntrinsicNode(
18176 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
18177 {LSNode1->getChain(), BasePtr,
18178 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
18179 NewMemVT, NewMMO);
18180
18181 SDValue Node1 =
18182 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
18183 SDValue Node2 =
18184 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
18185
18186 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
18187 return Node1;
18188 } else {
18189 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
18190
18191 SDValue Res = DAG.getMemIntrinsicNode(
18192 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
18193 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
18194 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
18195 NewMemVT, NewMMO);
18196
18197 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
18198 return Res;
18199 }
18200}
18201
18202// Try to combine two adjacent loads/stores to a single pair instruction from
18203// the XTHeadMemPair vendor extension.
18206 SelectionDAG &DAG = DCI.DAG;
18208 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
18209
18210 // Target does not support load/store pair.
18211 if (!Subtarget.hasVendorXTHeadMemPair())
18212 return SDValue();
18213
18214 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
18215 EVT MemVT = LSNode1->getMemoryVT();
18216 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
18217
18218 // No volatile, indexed or atomic loads/stores.
18219 if (!LSNode1->isSimple() || LSNode1->isIndexed())
18220 return SDValue();
18221
18222 // Function to get a base + constant representation from a memory value.
18223 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
18224 if (Ptr->getOpcode() == ISD::ADD)
18225 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
18226 return {Ptr->getOperand(0), C1->getZExtValue()};
18227 return {Ptr, 0};
18228 };
18229
18230 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
18231
18232 SDValue Chain = N->getOperand(0);
18233 for (SDUse &Use : Chain->uses()) {
18234 if (Use.getUser() != N && Use.getResNo() == 0 &&
18235 Use.getUser()->getOpcode() == N->getOpcode()) {
18237
18238 // No volatile, indexed or atomic loads/stores.
18239 if (!LSNode2->isSimple() || LSNode2->isIndexed())
18240 continue;
18241
18242 // Check if LSNode1 and LSNode2 have the same type and extension.
18243 if (LSNode1->getOpcode() == ISD::LOAD)
18244 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
18246 continue;
18247
18248 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
18249 continue;
18250
18251 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
18252
18253 // Check if the base pointer is the same for both instruction.
18254 if (Base1 != Base2)
18255 continue;
18256
18257 // Check if the offsets match the XTHeadMemPair encoding constraints.
18258 bool Valid = false;
18259 if (MemVT == MVT::i32) {
18260 // Check for adjacent i32 values and a 2-bit index.
18261 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
18262 Valid = true;
18263 } else if (MemVT == MVT::i64) {
18264 // Check for adjacent i64 values and a 2-bit index.
18265 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
18266 Valid = true;
18267 }
18268
18269 if (!Valid)
18270 continue;
18271
18272 // Try to combine.
18273 if (SDValue Res =
18274 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
18275 return Res;
18276 }
18277 }
18278
18279 return SDValue();
18280}
18281
18282// Fold
18283// (fp_to_int (froundeven X)) -> fcvt X, rne
18284// (fp_to_int (ftrunc X)) -> fcvt X, rtz
18285// (fp_to_int (ffloor X)) -> fcvt X, rdn
18286// (fp_to_int (fceil X)) -> fcvt X, rup
18287// (fp_to_int (fround X)) -> fcvt X, rmm
18288// (fp_to_int (frint X)) -> fcvt X
18291 const RISCVSubtarget &Subtarget) {
18292 SelectionDAG &DAG = DCI.DAG;
18293 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18294 MVT XLenVT = Subtarget.getXLenVT();
18295
18296 SDValue Src = N->getOperand(0);
18297
18298 // Don't do this for strict-fp Src.
18299 if (Src->isStrictFPOpcode())
18300 return SDValue();
18301
18302 // Ensure the FP type is legal.
18303 if (!TLI.isTypeLegal(Src.getValueType()))
18304 return SDValue();
18305
18306 // Don't do this for f16 with Zfhmin and not Zfh.
18307 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
18308 return SDValue();
18309
18310 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
18311 // If the result is invalid, we didn't find a foldable instruction.
18312 if (FRM == RISCVFPRndMode::Invalid)
18313 return SDValue();
18314
18315 SDLoc DL(N);
18316 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
18317 EVT VT = N->getValueType(0);
18318
18319 if (VT.isVector() && TLI.isTypeLegal(VT)) {
18320 MVT SrcVT = Src.getSimpleValueType();
18321 MVT SrcContainerVT = SrcVT;
18322 MVT ContainerVT = VT.getSimpleVT();
18323 SDValue XVal = Src.getOperand(0);
18324
18325 // For widening and narrowing conversions we just combine it into a
18326 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
18327 // end up getting lowered to their appropriate pseudo instructions based on
18328 // their operand types
18329 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
18330 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
18331 return SDValue();
18332
18333 // Make fixed-length vectors scalable first
18334 if (SrcVT.isFixedLengthVector()) {
18335 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
18336 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
18337 ContainerVT =
18338 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
18339 }
18340
18341 auto [Mask, VL] =
18342 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
18343
18344 SDValue FpToInt;
18345 if (FRM == RISCVFPRndMode::RTZ) {
18346 // Use the dedicated trunc static rounding mode if we're truncating so we
18347 // don't need to generate calls to fsrmi/fsrm
18348 unsigned Opc =
18349 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
18350 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
18351 } else {
18352 unsigned Opc =
18353 IsSigned ? RISCVISD::VFCVT_RM_X_F_VL : RISCVISD::VFCVT_RM_XU_F_VL;
18354 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
18355 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
18356 }
18357
18358 // If converted from fixed-length to scalable, convert back
18359 if (VT.isFixedLengthVector())
18360 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
18361
18362 return FpToInt;
18363 }
18364
18365 // Only handle XLen or i32 types. Other types narrower than XLen will
18366 // eventually be legalized to XLenVT.
18367 if (VT != MVT::i32 && VT != XLenVT)
18368 return SDValue();
18369
18370 unsigned Opc;
18371 if (VT == XLenVT)
18372 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
18373 else
18374 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
18375
18376 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
18377 DAG.getTargetConstant(FRM, DL, XLenVT));
18378 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
18379}
18380
18381// Fold
18382// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
18383// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
18384// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
18385// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
18386// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
18387// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
18390 const RISCVSubtarget &Subtarget) {
18391 SelectionDAG &DAG = DCI.DAG;
18392 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18393 MVT XLenVT = Subtarget.getXLenVT();
18394
18395 // Only handle XLen types. Other types narrower than XLen will eventually be
18396 // legalized to XLenVT.
18397 EVT DstVT = N->getValueType(0);
18398 if (DstVT != XLenVT)
18399 return SDValue();
18400
18401 SDValue Src = N->getOperand(0);
18402
18403 // Don't do this for strict-fp Src.
18404 if (Src->isStrictFPOpcode())
18405 return SDValue();
18406
18407 // Ensure the FP type is also legal.
18408 if (!TLI.isTypeLegal(Src.getValueType()))
18409 return SDValue();
18410
18411 // Don't do this for f16 with Zfhmin and not Zfh.
18412 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
18413 return SDValue();
18414
18415 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
18416
18417 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
18418 if (FRM == RISCVFPRndMode::Invalid)
18419 return SDValue();
18420
18421 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
18422
18423 unsigned Opc;
18424 if (SatVT == DstVT)
18425 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
18426 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
18427 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
18428 else
18429 return SDValue();
18430 // FIXME: Support other SatVTs by clamping before or after the conversion.
18431
18432 Src = Src.getOperand(0);
18433
18434 SDLoc DL(N);
18435 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
18436 DAG.getTargetConstant(FRM, DL, XLenVT));
18437
18438 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
18439 // extend.
18440 if (Opc == RISCVISD::FCVT_WU_RV64)
18441 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
18442
18443 // RISC-V FP-to-int conversions saturate to the destination register size, but
18444 // don't produce 0 for nan.
18445 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
18446 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
18447}
18448
18449// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
18450// smaller than XLenVT.
18452 const RISCVSubtarget &Subtarget) {
18453 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
18454
18455 SDValue Src = N->getOperand(0);
18456 if (Src.getOpcode() != ISD::BSWAP)
18457 return SDValue();
18458
18459 EVT VT = N->getValueType(0);
18460 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
18462 return SDValue();
18463
18464 SDLoc DL(N);
18465 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
18466}
18467
18469 const RISCVSubtarget &Subtarget) {
18470 // Fold:
18471 // vp.reverse(vp.load(ADDR, MASK)) -> vp.strided.load(ADDR, -1, MASK)
18472
18473 // Check if its first operand is a vp.load.
18474 auto *VPLoad = dyn_cast<VPLoadSDNode>(N->getOperand(0));
18475 if (!VPLoad)
18476 return SDValue();
18477
18478 EVT LoadVT = VPLoad->getValueType(0);
18479 // We do not have a strided_load version for masks, and the evl of vp.reverse
18480 // and vp.load should always be the same.
18481 if (!LoadVT.getVectorElementType().isByteSized() ||
18482 N->getOperand(2) != VPLoad->getVectorLength() ||
18483 !N->getOperand(0).hasOneUse())
18484 return SDValue();
18485
18486 // Check if the mask of outer vp.reverse are all 1's.
18487 if (!isOneOrOneSplat(N->getOperand(1)))
18488 return SDValue();
18489
18490 SDValue LoadMask = VPLoad->getMask();
18491 // If Mask is all ones, then load is unmasked and can be reversed.
18492 if (!isOneOrOneSplat(LoadMask)) {
18493 // If the mask is not all ones, we can reverse the load if the mask was also
18494 // reversed by an unmasked vp.reverse with the same EVL.
18495 if (LoadMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
18496 !isOneOrOneSplat(LoadMask.getOperand(1)) ||
18497 LoadMask.getOperand(2) != VPLoad->getVectorLength())
18498 return SDValue();
18499 LoadMask = LoadMask.getOperand(0);
18500 }
18501
18502 // Base = LoadAddr + (NumElem - 1) * ElemWidthByte
18503 SDLoc DL(N);
18504 MVT XLenVT = Subtarget.getXLenVT();
18505 SDValue NumElem = VPLoad->getVectorLength();
18506 uint64_t ElemWidthByte = VPLoad->getValueType(0).getScalarSizeInBits() / 8;
18507
18508 SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem,
18509 DAG.getConstant(1, DL, XLenVT));
18510 SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1,
18511 DAG.getConstant(ElemWidthByte, DL, XLenVT));
18512 SDValue Base = DAG.getNode(ISD::ADD, DL, XLenVT, VPLoad->getBasePtr(), Temp2);
18513 SDValue Stride = DAG.getSignedConstant(-ElemWidthByte, DL, XLenVT);
18514
18516 MachinePointerInfo PtrInfo(VPLoad->getAddressSpace());
18518 PtrInfo, VPLoad->getMemOperand()->getFlags(),
18519 LocationSize::beforeOrAfterPointer(), VPLoad->getAlign());
18520
18521 SDValue Ret = DAG.getStridedLoadVP(
18522 LoadVT, DL, VPLoad->getChain(), Base, Stride, LoadMask,
18523 VPLoad->getVectorLength(), MMO, VPLoad->isExpandingLoad());
18524
18525 DAG.ReplaceAllUsesOfValueWith(SDValue(VPLoad, 1), Ret.getValue(1));
18526
18527 return Ret;
18528}
18529
18531 const RISCVSubtarget &Subtarget) {
18532 // Fold:
18533 // vp.store(vp.reverse(VAL), ADDR, MASK) -> vp.strided.store(VAL, NEW_ADDR,
18534 // -1, MASK)
18535 auto *VPStore = cast<VPStoreSDNode>(N);
18536
18537 if (VPStore->getValue().getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE)
18538 return SDValue();
18539
18540 SDValue VPReverse = VPStore->getValue();
18541 EVT ReverseVT = VPReverse->getValueType(0);
18542
18543 // We do not have a strided_store version for masks, and the evl of vp.reverse
18544 // and vp.store should always be the same.
18545 if (!ReverseVT.getVectorElementType().isByteSized() ||
18546 VPStore->getVectorLength() != VPReverse.getOperand(2) ||
18547 !VPReverse.hasOneUse())
18548 return SDValue();
18549
18550 SDValue StoreMask = VPStore->getMask();
18551 // If Mask is all ones, then load is unmasked and can be reversed.
18552 if (!isOneOrOneSplat(StoreMask)) {
18553 // If the mask is not all ones, we can reverse the store if the mask was
18554 // also reversed by an unmasked vp.reverse with the same EVL.
18555 if (StoreMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
18556 !isOneOrOneSplat(StoreMask.getOperand(1)) ||
18557 StoreMask.getOperand(2) != VPStore->getVectorLength())
18558 return SDValue();
18559 StoreMask = StoreMask.getOperand(0);
18560 }
18561
18562 // Base = StoreAddr + (NumElem - 1) * ElemWidthByte
18563 SDLoc DL(N);
18564 MVT XLenVT = Subtarget.getXLenVT();
18565 SDValue NumElem = VPStore->getVectorLength();
18566 uint64_t ElemWidthByte = VPReverse.getValueType().getScalarSizeInBits() / 8;
18567
18568 SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem,
18569 DAG.getConstant(1, DL, XLenVT));
18570 SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1,
18571 DAG.getConstant(ElemWidthByte, DL, XLenVT));
18572 SDValue Base =
18573 DAG.getNode(ISD::ADD, DL, XLenVT, VPStore->getBasePtr(), Temp2);
18574 SDValue Stride = DAG.getSignedConstant(-ElemWidthByte, DL, XLenVT);
18575
18577 MachinePointerInfo PtrInfo(VPStore->getAddressSpace());
18579 PtrInfo, VPStore->getMemOperand()->getFlags(),
18580 LocationSize::beforeOrAfterPointer(), VPStore->getAlign());
18581
18582 return DAG.getStridedStoreVP(
18583 VPStore->getChain(), DL, VPReverse.getOperand(0), Base,
18584 VPStore->getOffset(), Stride, StoreMask, VPStore->getVectorLength(),
18585 VPStore->getMemoryVT(), MMO, VPStore->getAddressingMode(),
18586 VPStore->isTruncatingStore(), VPStore->isCompressingStore());
18587}
18588
18589// Peephole avgceil pattern.
18590// %1 = zext <N x i8> %a to <N x i32>
18591// %2 = zext <N x i8> %b to <N x i32>
18592// %3 = add nuw nsw <N x i32> %1, splat (i32 1)
18593// %4 = add nuw nsw <N x i32> %3, %2
18594// %5 = lshr <N x i32> %4, splat (i32 1)
18595// %6 = trunc <N x i32> %5 to <N x i8>
18597 const RISCVSubtarget &Subtarget) {
18598 EVT VT = N->getValueType(0);
18599
18600 // Ignore fixed vectors.
18601 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18602 if (!VT.isScalableVector() || !TLI.isTypeLegal(VT))
18603 return SDValue();
18604
18605 SDValue In = N->getOperand(0);
18606 SDValue Mask = N->getOperand(1);
18607 SDValue VL = N->getOperand(2);
18608
18609 // Input should be a vp_srl with same mask and VL.
18610 if (In.getOpcode() != ISD::VP_SRL || In.getOperand(2) != Mask ||
18611 In.getOperand(3) != VL)
18612 return SDValue();
18613
18614 // Shift amount should be 1.
18615 if (!isOneOrOneSplat(In.getOperand(1)))
18616 return SDValue();
18617
18618 // Shifted value should be a vp_add with same mask and VL.
18619 SDValue LHS = In.getOperand(0);
18620 if (LHS.getOpcode() != ISD::VP_ADD || LHS.getOperand(2) != Mask ||
18621 LHS.getOperand(3) != VL)
18622 return SDValue();
18623
18624 SDValue Operands[3];
18625
18626 // Matches another VP_ADD with same VL and Mask.
18627 auto FindAdd = [&](SDValue V, SDValue Other) {
18628 if (V.getOpcode() != ISD::VP_ADD || V.getOperand(2) != Mask ||
18629 V.getOperand(3) != VL)
18630 return false;
18631
18632 Operands[0] = Other;
18633 Operands[1] = V.getOperand(1);
18634 Operands[2] = V.getOperand(0);
18635 return true;
18636 };
18637
18638 // We need to find another VP_ADD in one of the operands.
18639 SDValue LHS0 = LHS.getOperand(0);
18640 SDValue LHS1 = LHS.getOperand(1);
18641 if (!FindAdd(LHS0, LHS1) && !FindAdd(LHS1, LHS0))
18642 return SDValue();
18643
18644 // Now we have three operands of two additions. Check that one of them is a
18645 // constant vector with ones.
18646 auto I = llvm::find_if(Operands,
18647 [](const SDValue &Op) { return isOneOrOneSplat(Op); });
18648 if (I == std::end(Operands))
18649 return SDValue();
18650 // We found a vector with ones, move if it to the end of the Operands array.
18651 std::swap(*I, Operands[2]);
18652
18653 // Make sure the other 2 operands can be promoted from the result type.
18654 for (SDValue Op : drop_end(Operands)) {
18655 if (Op.getOpcode() != ISD::VP_ZERO_EXTEND || Op.getOperand(1) != Mask ||
18656 Op.getOperand(2) != VL)
18657 return SDValue();
18658 // Input must be the same size or smaller than our result.
18659 if (Op.getOperand(0).getScalarValueSizeInBits() > VT.getScalarSizeInBits())
18660 return SDValue();
18661 }
18662
18663 // Pattern is detected.
18664 // Rebuild the zero extends in case the inputs are smaller than our result.
18665 SDValue NewOp0 = DAG.getNode(ISD::VP_ZERO_EXTEND, SDLoc(Operands[0]), VT,
18666 Operands[0].getOperand(0), Mask, VL);
18667 SDValue NewOp1 = DAG.getNode(ISD::VP_ZERO_EXTEND, SDLoc(Operands[1]), VT,
18668 Operands[1].getOperand(0), Mask, VL);
18669 // Build a AVGCEILU_VL which will be selected as a VAADDU with RNU rounding
18670 // mode.
18671 SDLoc DL(N);
18672 return DAG.getNode(RISCVISD::AVGCEILU_VL, DL, VT,
18673 {NewOp0, NewOp1, DAG.getUNDEF(VT), Mask, VL});
18674}
18675
18676// Convert from one FMA opcode to another based on whether we are negating the
18677// multiply result and/or the accumulator.
18678// NOTE: Only supports RVV operations with VL.
18679static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
18680 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
18681 if (NegMul) {
18682 // clang-format off
18683 switch (Opcode) {
18684 default: llvm_unreachable("Unexpected opcode");
18685 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
18686 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
18687 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
18688 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
18689 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
18690 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;
18691 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;
18692 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
18693 }
18694 // clang-format on
18695 }
18696
18697 // Negating the accumulator changes ADD<->SUB.
18698 if (NegAcc) {
18699 // clang-format off
18700 switch (Opcode) {
18701 default: llvm_unreachable("Unexpected opcode");
18702 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
18703 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
18704 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
18705 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
18706 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;
18707 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;
18708 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
18709 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
18710 }
18711 // clang-format on
18712 }
18713
18714 return Opcode;
18715}
18716
18718 // Fold FNEG_VL into FMA opcodes.
18719 // The first operand of strict-fp is chain.
18720 bool IsStrict =
18721 DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode());
18722 unsigned Offset = IsStrict ? 1 : 0;
18723 SDValue A = N->getOperand(0 + Offset);
18724 SDValue B = N->getOperand(1 + Offset);
18725 SDValue C = N->getOperand(2 + Offset);
18726 SDValue Mask = N->getOperand(3 + Offset);
18727 SDValue VL = N->getOperand(4 + Offset);
18728
18729 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
18730 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
18731 V.getOperand(2) == VL) {
18732 // Return the negated input.
18733 V = V.getOperand(0);
18734 return true;
18735 }
18736
18737 return false;
18738 };
18739
18740 bool NegA = invertIfNegative(A);
18741 bool NegB = invertIfNegative(B);
18742 bool NegC = invertIfNegative(C);
18743
18744 // If no operands are negated, we're done.
18745 if (!NegA && !NegB && !NegC)
18746 return SDValue();
18747
18748 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
18749 if (IsStrict)
18750 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
18751 {N->getOperand(0), A, B, C, Mask, VL});
18752 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
18753 VL);
18754}
18755
18758 const RISCVSubtarget &Subtarget) {
18759 SelectionDAG &DAG = DCI.DAG;
18760
18762 return V;
18763
18764 // FIXME: Ignore strict opcodes for now.
18765 if (DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode()))
18766 return SDValue();
18767
18768 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
18769}
18770
18772 const RISCVSubtarget &Subtarget) {
18773 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
18774
18775 EVT VT = N->getValueType(0);
18776
18777 if (VT != Subtarget.getXLenVT())
18778 return SDValue();
18779
18780 if (!isa<ConstantSDNode>(N->getOperand(1)))
18781 return SDValue();
18782 uint64_t ShAmt = N->getConstantOperandVal(1);
18783
18784 SDValue N0 = N->getOperand(0);
18785
18786 // Combine (sra (sext_inreg (shl X, C1), iX), C2) ->
18787 // (sra (shl X, C1+(XLen-iX)), C2+(XLen-iX)) so it gets selected as SLLI+SRAI.
18788 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse()) {
18789 unsigned ExtSize =
18790 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
18791 if (ShAmt < ExtSize && N0.getOperand(0).getOpcode() == ISD::SHL &&
18792 N0.getOperand(0).hasOneUse() &&
18794 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
18795 if (LShAmt < ExtSize) {
18796 unsigned Size = VT.getSizeInBits();
18797 SDLoc ShlDL(N0.getOperand(0));
18798 SDValue Shl =
18799 DAG.getNode(ISD::SHL, ShlDL, VT, N0.getOperand(0).getOperand(0),
18800 DAG.getConstant(LShAmt + (Size - ExtSize), ShlDL, VT));
18801 SDLoc DL(N);
18802 return DAG.getNode(ISD::SRA, DL, VT, Shl,
18803 DAG.getConstant(ShAmt + (Size - ExtSize), DL, VT));
18804 }
18805 }
18806 }
18807
18808 if (ShAmt > 32 || VT != MVT::i64)
18809 return SDValue();
18810
18811 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
18812 // FIXME: Should this be a generic combine? There's a similar combine on X86.
18813 //
18814 // Also try these folds where an add or sub is in the middle.
18815 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
18816 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
18817 SDValue Shl;
18818 ConstantSDNode *AddC = nullptr;
18819
18820 // We might have an ADD or SUB between the SRA and SHL.
18821 bool IsAdd = N0.getOpcode() == ISD::ADD;
18822 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
18823 // Other operand needs to be a constant we can modify.
18824 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
18825 if (!AddC)
18826 return SDValue();
18827
18828 // AddC needs to have at least 32 trailing zeros.
18829 if (llvm::countr_zero(AddC->getZExtValue()) < 32)
18830 return SDValue();
18831
18832 // All users should be a shift by constant less than or equal to 32. This
18833 // ensures we'll do this optimization for each of them to produce an
18834 // add/sub+sext_inreg they can all share.
18835 for (SDNode *U : N0->users()) {
18836 if (U->getOpcode() != ISD::SRA ||
18837 !isa<ConstantSDNode>(U->getOperand(1)) ||
18838 U->getConstantOperandVal(1) > 32)
18839 return SDValue();
18840 }
18841
18842 Shl = N0.getOperand(IsAdd ? 0 : 1);
18843 } else {
18844 // Not an ADD or SUB.
18845 Shl = N0;
18846 }
18847
18848 // Look for a shift left by 32.
18849 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
18850 Shl.getConstantOperandVal(1) != 32)
18851 return SDValue();
18852
18853 // We if we didn't look through an add/sub, then the shl should have one use.
18854 // If we did look through an add/sub, the sext_inreg we create is free so
18855 // we're only creating 2 new instructions. It's enough to only remove the
18856 // original sra+add/sub.
18857 if (!AddC && !Shl.hasOneUse())
18858 return SDValue();
18859
18860 SDLoc DL(N);
18861 SDValue In = Shl.getOperand(0);
18862
18863 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
18864 // constant.
18865 if (AddC) {
18866 SDValue ShiftedAddC =
18867 DAG.getConstant(AddC->getZExtValue() >> 32, DL, MVT::i64);
18868 if (IsAdd)
18869 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
18870 else
18871 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
18872 }
18873
18874 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
18875 DAG.getValueType(MVT::i32));
18876 if (ShAmt == 32)
18877 return SExt;
18878
18879 return DAG.getNode(
18880 ISD::SHL, DL, MVT::i64, SExt,
18881 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
18882}
18883
18884// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
18885// the result is used as the condition of a br_cc or select_cc we can invert,
18886// inverting the setcc is free, and Z is 0/1. Caller will invert the
18887// br_cc/select_cc.
18889 bool IsAnd = Cond.getOpcode() == ISD::AND;
18890 if (!IsAnd && Cond.getOpcode() != ISD::OR)
18891 return SDValue();
18892
18893 if (!Cond.hasOneUse())
18894 return SDValue();
18895
18896 SDValue Setcc = Cond.getOperand(0);
18897 SDValue Xor = Cond.getOperand(1);
18898 // Canonicalize setcc to LHS.
18899 if (Setcc.getOpcode() != ISD::SETCC)
18900 std::swap(Setcc, Xor);
18901 // LHS should be a setcc and RHS should be an xor.
18902 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
18903 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
18904 return SDValue();
18905
18906 // If the condition is an And, SimplifyDemandedBits may have changed
18907 // (xor Z, 1) to (not Z).
18908 SDValue Xor1 = Xor.getOperand(1);
18909 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
18910 return SDValue();
18911
18912 EVT VT = Cond.getValueType();
18913 SDValue Xor0 = Xor.getOperand(0);
18914
18915 // The LHS of the xor needs to be 0/1.
18917 if (!DAG.MaskedValueIsZero(Xor0, Mask))
18918 return SDValue();
18919
18920 // We can only invert integer setccs.
18921 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
18922 if (!SetCCOpVT.isScalarInteger())
18923 return SDValue();
18924
18925 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
18926 if (ISD::isIntEqualitySetCC(CCVal)) {
18927 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
18928 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
18929 Setcc.getOperand(1), CCVal);
18930 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
18931 // Invert (setlt 0, X) by converting to (setlt X, 1).
18932 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
18933 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
18934 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
18935 // (setlt X, 1) by converting to (setlt 0, X).
18936 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
18937 DAG.getConstant(0, SDLoc(Setcc), VT),
18938 Setcc.getOperand(0), CCVal);
18939 } else
18940 return SDValue();
18941
18942 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
18943 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
18944}
18945
18946// Perform common combines for BR_CC and SELECT_CC conditions.
18947static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
18948 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
18949 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
18950
18951 // As far as arithmetic right shift always saves the sign,
18952 // shift can be omitted.
18953 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
18954 // setge (sra X, N), 0 -> setge X, 0
18955 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
18956 LHS.getOpcode() == ISD::SRA) {
18957 LHS = LHS.getOperand(0);
18958 return true;
18959 }
18960
18961 if (!ISD::isIntEqualitySetCC(CCVal))
18962 return false;
18963
18964 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
18965 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
18966 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
18967 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
18968 // If we're looking for eq 0 instead of ne 0, we need to invert the
18969 // condition.
18970 bool Invert = CCVal == ISD::SETEQ;
18971 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
18972 if (Invert)
18973 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
18974
18975 RHS = LHS.getOperand(1);
18976 LHS = LHS.getOperand(0);
18977 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG, Subtarget);
18978
18979 CC = DAG.getCondCode(CCVal);
18980 return true;
18981 }
18982
18983 // If XOR is reused and has an immediate that will fit in XORI,
18984 // do not fold.
18985 auto isXorImmediate = [](const SDValue &Op) -> bool {
18986 if (const auto *XorCnst = dyn_cast<ConstantSDNode>(Op))
18987 return isInt<12>(XorCnst->getSExtValue());
18988 return false;
18989 };
18990 // Fold (X(i1) ^ 1) == 0 -> X != 0
18991 auto singleBitOp = [&DAG](const SDValue &VarOp,
18992 const SDValue &ConstOp) -> bool {
18993 if (const auto *XorCnst = dyn_cast<ConstantSDNode>(ConstOp)) {
18994 const APInt Mask = APInt::getBitsSetFrom(VarOp.getValueSizeInBits(), 1);
18995 return (XorCnst->getSExtValue() == 1) &&
18996 DAG.MaskedValueIsZero(VarOp, Mask);
18997 }
18998 return false;
18999 };
19000 auto onlyUsedBySelectOrBR = [](const SDValue &Op) -> bool {
19001 for (const SDNode *UserNode : Op->users()) {
19002 const unsigned Opcode = UserNode->getOpcode();
19003 if (Opcode != RISCVISD::SELECT_CC && Opcode != RISCVISD::BR_CC)
19004 return false;
19005 }
19006 return true;
19007 };
19008 auto isFoldableXorEq = [isXorImmediate, singleBitOp, onlyUsedBySelectOrBR](
19009 const SDValue &LHS, const SDValue &RHS) -> bool {
19010 return LHS.getOpcode() == ISD::XOR && isNullConstant(RHS) &&
19011 (!isXorImmediate(LHS.getOperand(1)) ||
19012 singleBitOp(LHS.getOperand(0), LHS.getOperand(1)) ||
19013 onlyUsedBySelectOrBR(LHS));
19014 };
19015 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
19016 if (isFoldableXorEq(LHS, RHS)) {
19017 RHS = LHS.getOperand(1);
19018 LHS = LHS.getOperand(0);
19019 return true;
19020 }
19021 // Fold ((sext (xor X, C)), 0, eq/ne) -> ((sext(X), C, eq/ne)
19022 if (LHS.getOpcode() == ISD::SIGN_EXTEND_INREG) {
19023 const SDValue LHS0 = LHS.getOperand(0);
19024 if (isFoldableXorEq(LHS0, RHS) && isa<ConstantSDNode>(LHS0.getOperand(1))) {
19025 // SEXT(XOR(X, Y)) -> XOR(SEXT(X), SEXT(Y)))
19026 RHS = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, LHS.getValueType(),
19027 LHS0.getOperand(1), LHS.getOperand(1));
19028 LHS = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, LHS.getValueType(),
19029 LHS0.getOperand(0), LHS.getOperand(1));
19030 return true;
19031 }
19032 }
19033
19034 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
19035 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
19036 LHS.getOperand(1).getOpcode() == ISD::Constant) {
19037 SDValue LHS0 = LHS.getOperand(0);
19038 if (LHS0.getOpcode() == ISD::AND &&
19039 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
19040 uint64_t Mask = LHS0.getConstantOperandVal(1);
19041 uint64_t ShAmt = LHS.getConstantOperandVal(1);
19042 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
19043 // XAndesPerf supports branch on test bit.
19044 if (Subtarget.hasVendorXAndesPerf()) {
19045 LHS =
19046 DAG.getNode(ISD::AND, DL, LHS.getValueType(), LHS0.getOperand(0),
19047 DAG.getConstant(Mask, DL, LHS.getValueType()));
19048 return true;
19049 }
19050
19051 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
19052 CC = DAG.getCondCode(CCVal);
19053
19054 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
19055 LHS = LHS0.getOperand(0);
19056 if (ShAmt != 0)
19057 LHS =
19058 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
19059 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
19060 return true;
19061 }
19062 }
19063 }
19064
19065 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
19066 // This can occur when legalizing some floating point comparisons.
19067 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
19068 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
19069 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
19070 CC = DAG.getCondCode(CCVal);
19071 RHS = DAG.getConstant(0, DL, LHS.getValueType());
19072 return true;
19073 }
19074
19075 if (isNullConstant(RHS)) {
19076 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
19077 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
19078 CC = DAG.getCondCode(CCVal);
19079 LHS = NewCond;
19080 return true;
19081 }
19082 }
19083
19084 return false;
19085}
19086
19087// Fold
19088// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
19089// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
19090// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
19091// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
19092// (select C, (rotl Y, X), Y) -> (rotl Y, (select C, X, 0)).
19093// (select C, (rotr Y, X), Y) -> (rotr Y, (select C, X, 0)).
19095 SDValue TrueVal, SDValue FalseVal,
19096 bool Swapped) {
19097 bool Commutative = true;
19098 unsigned Opc = TrueVal.getOpcode();
19099 switch (Opc) {
19100 default:
19101 return SDValue();
19102 case ISD::SHL:
19103 case ISD::SRA:
19104 case ISD::SRL:
19105 case ISD::SUB:
19106 case ISD::ROTL:
19107 case ISD::ROTR:
19108 Commutative = false;
19109 break;
19110 case ISD::ADD:
19111 case ISD::OR:
19112 case ISD::XOR:
19113 case ISD::UMIN:
19114 case ISD::UMAX:
19115 break;
19116 }
19117
19118 if (!TrueVal.hasOneUse())
19119 return SDValue();
19120
19121 unsigned OpToFold;
19122 if (FalseVal == TrueVal.getOperand(0))
19123 OpToFold = 0;
19124 else if (Commutative && FalseVal == TrueVal.getOperand(1))
19125 OpToFold = 1;
19126 else
19127 return SDValue();
19128
19129 EVT VT = N->getValueType(0);
19130 SDLoc DL(N);
19131 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
19132 EVT OtherOpVT = OtherOp.getValueType();
19133 SDValue IdentityOperand =
19134 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
19135 if (!Commutative)
19136 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
19137 assert(IdentityOperand && "No identity operand!");
19138
19139 if (Swapped)
19140 std::swap(OtherOp, IdentityOperand);
19141 SDValue NewSel =
19142 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
19143 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
19144}
19145
19146// This tries to get rid of `select` and `icmp` that are being used to handle
19147// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
19149 SDValue Cond = N->getOperand(0);
19150
19151 // This represents either CTTZ or CTLZ instruction.
19152 SDValue CountZeroes;
19153
19154 SDValue ValOnZero;
19155
19156 if (Cond.getOpcode() != ISD::SETCC)
19157 return SDValue();
19158
19159 if (!isNullConstant(Cond->getOperand(1)))
19160 return SDValue();
19161
19162 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
19163 if (CCVal == ISD::CondCode::SETEQ) {
19164 CountZeroes = N->getOperand(2);
19165 ValOnZero = N->getOperand(1);
19166 } else if (CCVal == ISD::CondCode::SETNE) {
19167 CountZeroes = N->getOperand(1);
19168 ValOnZero = N->getOperand(2);
19169 } else {
19170 return SDValue();
19171 }
19172
19173 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
19174 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
19175 CountZeroes = CountZeroes.getOperand(0);
19176
19177 if (CountZeroes.getOpcode() != ISD::CTTZ &&
19178 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
19179 CountZeroes.getOpcode() != ISD::CTLZ &&
19180 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
19181 return SDValue();
19182
19183 if (!isNullConstant(ValOnZero))
19184 return SDValue();
19185
19186 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
19187 if (Cond->getOperand(0) != CountZeroesArgument)
19188 return SDValue();
19189
19190 unsigned BitWidth = CountZeroes.getValueSizeInBits();
19191 if (!isPowerOf2_32(BitWidth))
19192 return SDValue();
19193
19194 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
19195 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
19196 CountZeroes.getValueType(), CountZeroesArgument);
19197 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
19198 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
19199 CountZeroes.getValueType(), CountZeroesArgument);
19200 }
19201
19202 SDValue BitWidthMinusOne =
19203 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
19204
19205 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
19206 CountZeroes, BitWidthMinusOne);
19207 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
19208}
19209
19211 const RISCVSubtarget &Subtarget) {
19212 SDValue Cond = N->getOperand(0);
19213 SDValue True = N->getOperand(1);
19214 SDValue False = N->getOperand(2);
19215 SDLoc DL(N);
19216 EVT VT = N->getValueType(0);
19217 EVT CondVT = Cond.getValueType();
19218
19219 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
19220 return SDValue();
19221
19222 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
19223 // BEXTI, where C is power of 2.
19224 if (Subtarget.hasBEXTILike() && VT.isScalarInteger() &&
19225 (Subtarget.hasCZEROLike() || Subtarget.hasVendorXTHeadCondMov())) {
19226 SDValue LHS = Cond.getOperand(0);
19227 SDValue RHS = Cond.getOperand(1);
19228 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
19229 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
19230 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
19231 const APInt &MaskVal = LHS.getConstantOperandAPInt(1);
19232 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))
19233 return DAG.getSelect(DL, VT,
19234 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
19235 False, True);
19236 }
19237 }
19238 return SDValue();
19239}
19240
19241static bool matchSelectAddSub(SDValue TrueVal, SDValue FalseVal, bool &SwapCC) {
19242 if (!TrueVal.hasOneUse() || !FalseVal.hasOneUse())
19243 return false;
19244
19245 SwapCC = false;
19246 if (TrueVal.getOpcode() == ISD::SUB && FalseVal.getOpcode() == ISD::ADD) {
19247 std::swap(TrueVal, FalseVal);
19248 SwapCC = true;
19249 }
19250
19251 if (TrueVal.getOpcode() != ISD::ADD || FalseVal.getOpcode() != ISD::SUB)
19252 return false;
19253
19254 SDValue A = FalseVal.getOperand(0);
19255 SDValue B = FalseVal.getOperand(1);
19256 // Add is commutative, so check both orders
19257 return ((TrueVal.getOperand(0) == A && TrueVal.getOperand(1) == B) ||
19258 (TrueVal.getOperand(1) == A && TrueVal.getOperand(0) == B));
19259}
19260
19261/// Convert vselect CC, (add a, b), (sub a, b) to add a, (vselect CC, -b, b).
19262/// This allows us match a vadd.vv fed by a masked vrsub, which reduces
19263/// register pressure over the add followed by masked vsub sequence.
19265 SDLoc DL(N);
19266 EVT VT = N->getValueType(0);
19267 SDValue CC = N->getOperand(0);
19268 SDValue TrueVal = N->getOperand(1);
19269 SDValue FalseVal = N->getOperand(2);
19270
19271 bool SwapCC;
19272 if (!matchSelectAddSub(TrueVal, FalseVal, SwapCC))
19273 return SDValue();
19274
19275 SDValue Sub = SwapCC ? TrueVal : FalseVal;
19276 SDValue A = Sub.getOperand(0);
19277 SDValue B = Sub.getOperand(1);
19278
19279 // Arrange the select such that we can match a masked
19280 // vrsub.vi to perform the conditional negate
19281 SDValue NegB = DAG.getNegative(B, DL, VT);
19282 if (!SwapCC)
19283 CC = DAG.getLogicalNOT(DL, CC, CC->getValueType(0));
19284 SDValue NewB = DAG.getNode(ISD::VSELECT, DL, VT, CC, NegB, B);
19285 return DAG.getNode(ISD::ADD, DL, VT, A, NewB);
19286}
19287
19289 const RISCVSubtarget &Subtarget) {
19290 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
19291 return Folded;
19292
19293 if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
19294 return V;
19295
19296 if (Subtarget.hasConditionalMoveFusion())
19297 return SDValue();
19298
19299 SDValue TrueVal = N->getOperand(1);
19300 SDValue FalseVal = N->getOperand(2);
19301 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
19302 return V;
19303 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
19304}
19305
19306/// If we have a build_vector where each lane is binop X, C, where C
19307/// is a constant (but not necessarily the same constant on all lanes),
19308/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
19309/// We assume that materializing a constant build vector will be no more
19310/// expensive that performing O(n) binops.
19312 const RISCVSubtarget &Subtarget,
19313 const RISCVTargetLowering &TLI) {
19314 SDLoc DL(N);
19315 EVT VT = N->getValueType(0);
19316
19317 assert(!VT.isScalableVector() && "unexpected build vector");
19318
19319 if (VT.getVectorNumElements() == 1)
19320 return SDValue();
19321
19322 const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
19323 if (!TLI.isBinOp(Opcode))
19324 return SDValue();
19325
19326 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
19327 return SDValue();
19328
19329 // This BUILD_VECTOR involves an implicit truncation, and sinking
19330 // truncates through binops is non-trivial.
19331 if (N->op_begin()->getValueType() != VT.getVectorElementType())
19332 return SDValue();
19333
19334 SmallVector<SDValue> LHSOps;
19335 SmallVector<SDValue> RHSOps;
19336 for (SDValue Op : N->ops()) {
19337 if (Op.isUndef()) {
19338 // We can't form a divide or remainder from undef.
19339 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
19340 return SDValue();
19341
19342 LHSOps.push_back(Op);
19343 RHSOps.push_back(Op);
19344 continue;
19345 }
19346
19347 // TODO: We can handle operations which have an neutral rhs value
19348 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
19349 // of profit in a more explicit manner.
19350 if (Op.getOpcode() != Opcode || !Op.hasOneUse())
19351 return SDValue();
19352
19353 LHSOps.push_back(Op.getOperand(0));
19354 if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
19355 !isa<ConstantFPSDNode>(Op.getOperand(1)))
19356 return SDValue();
19357 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
19358 // have different LHS and RHS types.
19359 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
19360 return SDValue();
19361
19362 RHSOps.push_back(Op.getOperand(1));
19363 }
19364
19365 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
19366 DAG.getBuildVector(VT, DL, RHSOps));
19367}
19368
19370 ElementCount OpEC = OpVT.getVectorElementCount();
19371 assert(OpEC.isKnownMultipleOf(4) && OpVT.getVectorElementType() == MVT::i8);
19372 return MVT::getVectorVT(MVT::i32, OpEC.divideCoefficientBy(4));
19373}
19374
19375/// Given fixed length vectors A and B with equal element types, but possibly
19376/// different number of elements, return A + B where either A or B is zero
19377/// padded to the larger number of elements.
19379 SelectionDAG &DAG) {
19380 // NOTE: Manually doing the extract/add/insert scheme produces
19381 // significantly better codegen than the naive pad with zeros
19382 // and add scheme.
19383 EVT AVT = A.getValueType();
19384 EVT BVT = B.getValueType();
19387 std::swap(A, B);
19388 std::swap(AVT, BVT);
19389 }
19390
19391 SDValue BPart = DAG.getExtractSubvector(DL, AVT, B, 0);
19392 SDValue Res = DAG.getNode(ISD::ADD, DL, AVT, A, BPart);
19393 return DAG.getInsertSubvector(DL, B, Res, 0);
19394}
19395
19397 SelectionDAG &DAG,
19398 const RISCVSubtarget &Subtarget,
19399 const RISCVTargetLowering &TLI) {
19400 using namespace SDPatternMatch;
19401 // Note: We intentionally do not check the legality of the reduction type.
19402 // We want to handle the m4/m8 *src* types, and thus need to let illegal
19403 // intermediate types flow through here.
19404 if (InVec.getValueType().getVectorElementType() != MVT::i32 ||
19406 return SDValue();
19407
19408 // Recurse through adds/disjoint ors (since generic dag canonicalizes to that
19409 // form).
19410 SDValue A, B;
19411 if (sd_match(InVec, m_AddLike(m_Value(A), m_Value(B)))) {
19412 SDValue AOpt = foldReduceOperandViaVQDOT(A, DL, DAG, Subtarget, TLI);
19413 SDValue BOpt = foldReduceOperandViaVQDOT(B, DL, DAG, Subtarget, TLI);
19414 if (AOpt || BOpt) {
19415 if (AOpt)
19416 A = AOpt;
19417 if (BOpt)
19418 B = BOpt;
19419 // From here, we're doing A + B with mixed types, implicitly zero
19420 // padded to the wider type. Note that we *don't* need the result
19421 // type to be the original VT, and in fact prefer narrower ones
19422 // if possible.
19423 return getZeroPaddedAdd(DL, A, B, DAG);
19424 }
19425 }
19426
19427 // zext a <--> partial_reduce_umla 0, a, 1
19428 // sext a <--> partial_reduce_smla 0, a, 1
19429 if (InVec.getOpcode() == ISD::ZERO_EXTEND ||
19430 InVec.getOpcode() == ISD::SIGN_EXTEND) {
19431 SDValue A = InVec.getOperand(0);
19432 EVT OpVT = A.getValueType();
19433 if (OpVT.getVectorElementType() != MVT::i8 || !TLI.isTypeLegal(OpVT))
19434 return SDValue();
19435
19436 MVT ResVT = getQDOTXResultType(A.getSimpleValueType());
19437 SDValue B = DAG.getConstant(0x1, DL, OpVT);
19438 bool IsSigned = InVec.getOpcode() == ISD::SIGN_EXTEND;
19439 unsigned Opc =
19440 IsSigned ? ISD::PARTIAL_REDUCE_SMLA : ISD::PARTIAL_REDUCE_UMLA;
19441 return DAG.getNode(Opc, DL, ResVT, {DAG.getConstant(0, DL, ResVT), A, B});
19442 }
19443
19444 // mul (sext a, sext b) -> partial_reduce_smla 0, a, b
19445 // mul (zext a, zext b) -> partial_reduce_umla 0, a, b
19446 // mul (sext a, zext b) -> partial_reduce_ssmla 0, a, b
19447 // mul (zext a, sext b) -> partial_reduce_smla 0, b, a (swapped)
19448 if (!sd_match(InVec, m_Mul(m_Value(A), m_Value(B))))
19449 return SDValue();
19450
19451 if (!ISD::isExtOpcode(A.getOpcode()))
19452 return SDValue();
19453
19454 EVT OpVT = A.getOperand(0).getValueType();
19455 if (OpVT.getVectorElementType() != MVT::i8 ||
19456 OpVT != B.getOperand(0).getValueType() ||
19457 !TLI.isTypeLegal(A.getValueType()))
19458 return SDValue();
19459
19460 unsigned Opc;
19461 if (A.getOpcode() == ISD::SIGN_EXTEND && B.getOpcode() == ISD::SIGN_EXTEND)
19462 Opc = ISD::PARTIAL_REDUCE_SMLA;
19463 else if (A.getOpcode() == ISD::ZERO_EXTEND &&
19464 B.getOpcode() == ISD::ZERO_EXTEND)
19465 Opc = ISD::PARTIAL_REDUCE_UMLA;
19466 else if (A.getOpcode() == ISD::SIGN_EXTEND &&
19467 B.getOpcode() == ISD::ZERO_EXTEND)
19468 Opc = ISD::PARTIAL_REDUCE_SUMLA;
19469 else if (A.getOpcode() == ISD::ZERO_EXTEND &&
19470 B.getOpcode() == ISD::SIGN_EXTEND) {
19471 Opc = ISD::PARTIAL_REDUCE_SUMLA;
19472 std::swap(A, B);
19473 } else
19474 return SDValue();
19475
19476 MVT ResVT = getQDOTXResultType(OpVT.getSimpleVT());
19477 return DAG.getNode(
19478 Opc, DL, ResVT,
19479 {DAG.getConstant(0, DL, ResVT), A.getOperand(0), B.getOperand(0)});
19480}
19481
19483 const RISCVSubtarget &Subtarget,
19484 const RISCVTargetLowering &TLI) {
19485 if (!Subtarget.hasStdExtZvqdotq())
19486 return SDValue();
19487
19488 SDLoc DL(N);
19489 EVT VT = N->getValueType(0);
19490 SDValue InVec = N->getOperand(0);
19491 if (SDValue V = foldReduceOperandViaVQDOT(InVec, DL, DAG, Subtarget, TLI))
19492 return DAG.getNode(ISD::VECREDUCE_ADD, DL, VT, V);
19493 return SDValue();
19494}
19495
19497 const RISCVSubtarget &Subtarget,
19498 const RISCVTargetLowering &TLI) {
19499 SDValue InVec = N->getOperand(0);
19500 SDValue InVal = N->getOperand(1);
19501 SDValue EltNo = N->getOperand(2);
19502 SDLoc DL(N);
19503
19504 EVT VT = InVec.getValueType();
19505 if (VT.isScalableVector())
19506 return SDValue();
19507
19508 if (!InVec.hasOneUse())
19509 return SDValue();
19510
19511 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
19512 // move the insert_vector_elts into the arms of the binop. Note that
19513 // the new RHS must be a constant.
19514 const unsigned InVecOpcode = InVec->getOpcode();
19515 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
19516 InVal.hasOneUse()) {
19517 SDValue InVecLHS = InVec->getOperand(0);
19518 SDValue InVecRHS = InVec->getOperand(1);
19519 SDValue InValLHS = InVal->getOperand(0);
19520 SDValue InValRHS = InVal->getOperand(1);
19521
19523 return SDValue();
19524 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
19525 return SDValue();
19526 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
19527 // have different LHS and RHS types.
19528 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
19529 return SDValue();
19531 InVecLHS, InValLHS, EltNo);
19533 InVecRHS, InValRHS, EltNo);
19534 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
19535 }
19536
19537 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
19538 // move the insert_vector_elt to the source operand of the concat_vector.
19539 if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
19540 return SDValue();
19541
19542 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
19543 if (!IndexC)
19544 return SDValue();
19545 unsigned Elt = IndexC->getZExtValue();
19546
19547 EVT ConcatVT = InVec.getOperand(0).getValueType();
19548 if (ConcatVT.getVectorElementType() != InVal.getValueType())
19549 return SDValue();
19550 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
19551 unsigned NewIdx = Elt % ConcatNumElts;
19552
19553 unsigned ConcatOpIdx = Elt / ConcatNumElts;
19554 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
19555 ConcatOp = DAG.getInsertVectorElt(DL, ConcatOp, InVal, NewIdx);
19556
19557 SmallVector<SDValue> ConcatOps(InVec->ops());
19558 ConcatOps[ConcatOpIdx] = ConcatOp;
19559 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
19560}
19561
19562// If we're concatenating a series of vector loads like
19563// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
19564// Then we can turn this into a strided load by widening the vector elements
19565// vlse32 p, stride=n
19567 const RISCVSubtarget &Subtarget,
19568 const RISCVTargetLowering &TLI) {
19569 SDLoc DL(N);
19570 EVT VT = N->getValueType(0);
19571
19572 // Only perform this combine on legal MVTs.
19573 if (!TLI.isTypeLegal(VT))
19574 return SDValue();
19575
19576 // TODO: Potentially extend this to scalable vectors
19577 if (VT.isScalableVector())
19578 return SDValue();
19579
19580 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
19581 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
19582 !SDValue(BaseLd, 0).hasOneUse())
19583 return SDValue();
19584
19585 EVT BaseLdVT = BaseLd->getValueType(0);
19586
19587 // Go through the loads and check that they're strided
19589 Lds.push_back(BaseLd);
19590 Align Align = BaseLd->getAlign();
19591 for (SDValue Op : N->ops().drop_front()) {
19592 auto *Ld = dyn_cast<LoadSDNode>(Op);
19593 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
19594 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
19595 Ld->getValueType(0) != BaseLdVT)
19596 return SDValue();
19597
19598 Lds.push_back(Ld);
19599
19600 // The common alignment is the most restrictive (smallest) of all the loads
19601 Align = std::min(Align, Ld->getAlign());
19602 }
19603
19604 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
19605 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
19606 LoadSDNode *Ld2) -> std::optional<PtrDiff> {
19607 // If the load ptrs can be decomposed into a common (Base + Index) with a
19608 // common constant stride, then return the constant stride.
19609 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
19610 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
19611 if (BIO1.equalBaseIndex(BIO2, DAG))
19612 return {{BIO2.getOffset() - BIO1.getOffset(), false}};
19613
19614 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
19615 SDValue P1 = Ld1->getBasePtr();
19616 SDValue P2 = Ld2->getBasePtr();
19617 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
19618 return {{P2.getOperand(1), false}};
19619 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
19620 return {{P1.getOperand(1), true}};
19621
19622 return std::nullopt;
19623 };
19624
19625 // Get the distance between the first and second loads
19626 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
19627 if (!BaseDiff)
19628 return SDValue();
19629
19630 // Check all the loads are the same distance apart
19631 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
19632 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
19633 return SDValue();
19634
19635 // TODO: At this point, we've successfully matched a generalized gather
19636 // load. Maybe we should emit that, and then move the specialized
19637 // matchers above and below into a DAG combine?
19638
19639 // Get the widened scalar type, e.g. v4i8 -> i64
19640 unsigned WideScalarBitWidth =
19641 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
19642 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
19643
19644 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
19645 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
19646 if (!TLI.isTypeLegal(WideVecVT))
19647 return SDValue();
19648
19649 // Check that the operation is legal
19650 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
19651 return SDValue();
19652
19653 auto [StrideVariant, MustNegateStride] = *BaseDiff;
19654 SDValue Stride =
19655 std::holds_alternative<SDValue>(StrideVariant)
19656 ? std::get<SDValue>(StrideVariant)
19657 : DAG.getSignedConstant(std::get<int64_t>(StrideVariant), DL,
19658 Lds[0]->getOffset().getValueType());
19659 if (MustNegateStride)
19660 Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
19661
19662 SDValue AllOneMask =
19663 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
19664 DAG.getConstant(1, DL, MVT::i1));
19665
19666 uint64_t MemSize;
19667 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
19668 ConstStride && ConstStride->getSExtValue() >= 0)
19669 // total size = (elsize * n) + (stride - elsize) * (n-1)
19670 // = elsize + stride * (n-1)
19671 MemSize = WideScalarVT.getSizeInBits() +
19672 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
19673 else
19674 // If Stride isn't constant, then we can't know how much it will load
19676
19678 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
19679 Align);
19680
19681 SDValue StridedLoad = DAG.getStridedLoadVP(
19682 WideVecVT, DL, BaseLd->getChain(), BaseLd->getBasePtr(), Stride,
19683 AllOneMask,
19684 DAG.getConstant(N->getNumOperands(), DL, Subtarget.getXLenVT()), MMO);
19685
19686 for (SDValue Ld : N->ops())
19687 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
19688
19689 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
19690}
19691
19693 const RISCVSubtarget &Subtarget,
19694 const RISCVTargetLowering &TLI) {
19695 SDLoc DL(N);
19696 EVT VT = N->getValueType(0);
19697 const unsigned ElementSize = VT.getScalarSizeInBits();
19698 const unsigned NumElts = VT.getVectorNumElements();
19699 SDValue V1 = N->getOperand(0);
19700 SDValue V2 = N->getOperand(1);
19701 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask();
19702 MVT XLenVT = Subtarget.getXLenVT();
19703
19704 // Recognized a disguised select of add/sub.
19705 bool SwapCC;
19706 if (ShuffleVectorInst::isSelectMask(Mask, NumElts) &&
19707 matchSelectAddSub(V1, V2, SwapCC)) {
19708 SDValue Sub = SwapCC ? V1 : V2;
19709 SDValue A = Sub.getOperand(0);
19710 SDValue B = Sub.getOperand(1);
19711
19712 SmallVector<SDValue> MaskVals;
19713 for (int MaskIndex : Mask) {
19714 bool SelectMaskVal = (MaskIndex < (int)NumElts);
19715 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
19716 }
19717 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
19718 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts);
19719 SDValue CC = DAG.getBuildVector(MaskVT, DL, MaskVals);
19720
19721 // Arrange the select such that we can match a masked
19722 // vrsub.vi to perform the conditional negate
19723 SDValue NegB = DAG.getNegative(B, DL, VT);
19724 if (!SwapCC)
19725 CC = DAG.getLogicalNOT(DL, CC, CC->getValueType(0));
19726 SDValue NewB = DAG.getNode(ISD::VSELECT, DL, VT, CC, NegB, B);
19727 return DAG.getNode(ISD::ADD, DL, VT, A, NewB);
19728 }
19729
19730 // Custom legalize <N x i128> or <N x i256> to <M x ELEN>. This runs
19731 // during the combine phase before type legalization, and relies on
19732 // DAGCombine not undoing the transform if isShuffleMaskLegal returns false
19733 // for the source mask.
19734 if (TLI.isTypeLegal(VT) || ElementSize <= Subtarget.getELen() ||
19735 !isPowerOf2_64(ElementSize) || VT.getVectorNumElements() % 2 != 0 ||
19736 VT.isFloatingPoint() || TLI.isShuffleMaskLegal(Mask, VT))
19737 return SDValue();
19738
19739 SmallVector<int, 8> NewMask;
19740 narrowShuffleMaskElts(2, Mask, NewMask);
19741
19742 LLVMContext &C = *DAG.getContext();
19743 EVT NewEltVT = EVT::getIntegerVT(C, ElementSize / 2);
19744 EVT NewVT = EVT::getVectorVT(C, NewEltVT, VT.getVectorNumElements() * 2);
19745 SDValue Res = DAG.getVectorShuffle(NewVT, DL, DAG.getBitcast(NewVT, V1),
19746 DAG.getBitcast(NewVT, V2), NewMask);
19747 return DAG.getBitcast(VT, Res);
19748}
19749
19751 const RISCVSubtarget &Subtarget) {
19752 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
19753
19754 if (N->getValueType(0).isFixedLengthVector())
19755 return SDValue();
19756
19757 SDValue Addend = N->getOperand(0);
19758 SDValue MulOp = N->getOperand(1);
19759
19760 if (N->getOpcode() == RISCVISD::ADD_VL) {
19761 SDValue AddPassthruOp = N->getOperand(2);
19762 if (!AddPassthruOp.isUndef())
19763 return SDValue();
19764 }
19765
19766 auto IsVWMulOpc = [](unsigned Opc) {
19767 switch (Opc) {
19768 case RISCVISD::VWMUL_VL:
19769 case RISCVISD::VWMULU_VL:
19770 case RISCVISD::VWMULSU_VL:
19771 return true;
19772 default:
19773 return false;
19774 }
19775 };
19776
19777 if (!IsVWMulOpc(MulOp.getOpcode()))
19778 std::swap(Addend, MulOp);
19779
19780 if (!IsVWMulOpc(MulOp.getOpcode()))
19781 return SDValue();
19782
19783 SDValue MulPassthruOp = MulOp.getOperand(2);
19784
19785 if (!MulPassthruOp.isUndef())
19786 return SDValue();
19787
19788 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
19789 const RISCVSubtarget &Subtarget) {
19790 if (N->getOpcode() == ISD::ADD) {
19791 SDLoc DL(N);
19792 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
19793 Subtarget);
19794 }
19795 return std::make_pair(N->getOperand(3), N->getOperand(4));
19796 }(N, DAG, Subtarget);
19797
19798 SDValue MulMask = MulOp.getOperand(3);
19799 SDValue MulVL = MulOp.getOperand(4);
19800
19801 if (AddMask != MulMask || AddVL != MulVL)
19802 return SDValue();
19803
19804 const auto &TSInfo =
19805 static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());
19806 unsigned Opc = TSInfo.getMAccOpcode(MulOp.getOpcode());
19807
19808 SDLoc DL(N);
19809 EVT VT = N->getValueType(0);
19810 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
19811 AddVL};
19812 return DAG.getNode(Opc, DL, VT, Ops);
19813}
19814
19816 const RISCVSubtarget &Subtarget) {
19817
19818 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
19819
19820 if (!N->getValueType(0).isVector())
19821 return SDValue();
19822
19823 SDValue Addend = N->getOperand(0);
19824 SDValue DotOp = N->getOperand(1);
19825
19826 if (N->getOpcode() == RISCVISD::ADD_VL) {
19827 SDValue AddPassthruOp = N->getOperand(2);
19828 if (!AddPassthruOp.isUndef())
19829 return SDValue();
19830 }
19831
19832 auto IsVqdotqOpc = [](unsigned Opc) {
19833 switch (Opc) {
19834 case RISCVISD::VQDOT_VL:
19835 case RISCVISD::VQDOTU_VL:
19836 case RISCVISD::VQDOTSU_VL:
19837 return true;
19838 default:
19839 return false;
19840 }
19841 };
19842
19843 if (!IsVqdotqOpc(DotOp.getOpcode()))
19844 std::swap(Addend, DotOp);
19845
19846 if (!IsVqdotqOpc(DotOp.getOpcode()))
19847 return SDValue();
19848
19849 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
19850 const RISCVSubtarget &Subtarget) {
19851 if (N->getOpcode() == ISD::ADD) {
19852 SDLoc DL(N);
19853 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
19854 Subtarget);
19855 }
19856 return std::make_pair(N->getOperand(3), N->getOperand(4));
19857 }(N, DAG, Subtarget);
19858
19859 SDValue MulVL = DotOp.getOperand(4);
19860 if (AddVL != MulVL)
19861 return SDValue();
19862
19863 if (AddMask.getOpcode() != RISCVISD::VMSET_VL ||
19864 AddMask.getOperand(0) != MulVL)
19865 return SDValue();
19866
19867 SDValue AccumOp = DotOp.getOperand(2);
19868 SDLoc DL(N);
19869 EVT VT = N->getValueType(0);
19870 Addend = DAG.getNode(RISCVISD::ADD_VL, DL, VT, Addend, AccumOp,
19871 DAG.getUNDEF(VT), AddMask, AddVL);
19872
19873 SDValue Ops[] = {DotOp.getOperand(0), DotOp.getOperand(1), Addend,
19874 DotOp.getOperand(3), DotOp->getOperand(4)};
19875 return DAG.getNode(DotOp->getOpcode(), DL, VT, Ops);
19876}
19877
19878static bool
19880 ISD::MemIndexType &IndexType,
19882 if (!DCI.isBeforeLegalize())
19883 return false;
19884
19885 SelectionDAG &DAG = DCI.DAG;
19886 const MVT XLenVT =
19887 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
19888
19889 const EVT IndexVT = Index.getValueType();
19890
19891 // RISC-V indexed loads only support the "unsigned unscaled" addressing
19892 // mode, so anything else must be manually legalized.
19893 if (!isIndexTypeSigned(IndexType))
19894 return false;
19895
19896 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
19897 // Any index legalization should first promote to XLenVT, so we don't lose
19898 // bits when scaling. This may create an illegal index type so we let
19899 // LLVM's legalization take care of the splitting.
19900 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
19901 Index = DAG.getNode(ISD::SIGN_EXTEND, DL,
19902 EVT::getVectorVT(*DAG.getContext(), XLenVT,
19903 IndexVT.getVectorElementCount()),
19904 Index);
19905 }
19906 IndexType = ISD::UNSIGNED_SCALED;
19907 return true;
19908}
19909
19910/// Match the index vector of a scatter or gather node as the shuffle mask
19911/// which performs the rearrangement if possible. Will only match if
19912/// all lanes are touched, and thus replacing the scatter or gather with
19913/// a unit strided access and shuffle is legal.
19914static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask,
19915 SmallVector<int> &ShuffleMask) {
19916 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
19917 return false;
19918 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
19919 return false;
19920
19921 const unsigned ElementSize = VT.getScalarStoreSize();
19922 const unsigned NumElems = VT.getVectorNumElements();
19923
19924 // Create the shuffle mask and check all bits active
19925 assert(ShuffleMask.empty());
19926 BitVector ActiveLanes(NumElems);
19927 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
19928 // TODO: We've found an active bit of UB, and could be
19929 // more aggressive here if desired.
19930 if (Index->getOperand(i)->isUndef())
19931 return false;
19932 uint64_t C = Index->getConstantOperandVal(i);
19933 if (C % ElementSize != 0)
19934 return false;
19935 C = C / ElementSize;
19936 if (C >= NumElems)
19937 return false;
19938 ShuffleMask.push_back(C);
19939 ActiveLanes.set(C);
19940 }
19941 return ActiveLanes.all();
19942}
19943
19944/// Match the index of a gather or scatter operation as an operation
19945/// with twice the element width and half the number of elements. This is
19946/// generally profitable (if legal) because these operations are linear
19947/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
19948/// come out ahead.
19949static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask,
19950 Align BaseAlign, const RISCVSubtarget &ST) {
19951 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
19952 return false;
19953 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
19954 return false;
19955
19956 // Attempt a doubling. If we can use a element type 4x or 8x in
19957 // size, this will happen via multiply iterations of the transform.
19958 const unsigned NumElems = VT.getVectorNumElements();
19959 if (NumElems % 2 != 0)
19960 return false;
19961
19962 const unsigned ElementSize = VT.getScalarStoreSize();
19963 const unsigned WiderElementSize = ElementSize * 2;
19964 if (WiderElementSize > ST.getELen()/8)
19965 return false;
19966
19967 if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
19968 return false;
19969
19970 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
19971 // TODO: We've found an active bit of UB, and could be
19972 // more aggressive here if desired.
19973 if (Index->getOperand(i)->isUndef())
19974 return false;
19975 // TODO: This offset check is too strict if we support fully
19976 // misaligned memory operations.
19977 uint64_t C = Index->getConstantOperandVal(i);
19978 if (i % 2 == 0) {
19979 if (C % WiderElementSize != 0)
19980 return false;
19981 continue;
19982 }
19983 uint64_t Last = Index->getConstantOperandVal(i-1);
19984 if (C != Last + ElementSize)
19985 return false;
19986 }
19987 return true;
19988}
19989
19990// trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
19991// This would be benefit for the cases where X and Y are both the same value
19992// type of low precision vectors. Since the truncate would be lowered into
19993// n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
19994// restriction, such pattern would be expanded into a series of "vsetvli"
19995// and "vnsrl" instructions later to reach this point.
19997 SDValue Mask = N->getOperand(1);
19998 SDValue VL = N->getOperand(2);
19999
20000 bool IsVLMAX = isAllOnesConstant(VL) ||
20001 (isa<RegisterSDNode>(VL) &&
20002 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
20003 if (!IsVLMAX || Mask.getOpcode() != RISCVISD::VMSET_VL ||
20004 Mask.getOperand(0) != VL)
20005 return SDValue();
20006
20007 auto IsTruncNode = [&](SDValue V) {
20008 return V.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
20009 V.getOperand(1) == Mask && V.getOperand(2) == VL;
20010 };
20011
20012 SDValue Op = N->getOperand(0);
20013
20014 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
20015 // to distinguish such pattern.
20016 while (IsTruncNode(Op)) {
20017 if (!Op.hasOneUse())
20018 return SDValue();
20019 Op = Op.getOperand(0);
20020 }
20021
20022 if (Op.getOpcode() != ISD::SRA || !Op.hasOneUse())
20023 return SDValue();
20024
20025 SDValue N0 = Op.getOperand(0);
20026 SDValue N1 = Op.getOperand(1);
20027 if (N0.getOpcode() != ISD::SIGN_EXTEND || !N0.hasOneUse() ||
20028 N1.getOpcode() != ISD::ZERO_EXTEND || !N1.hasOneUse())
20029 return SDValue();
20030
20031 SDValue N00 = N0.getOperand(0);
20032 SDValue N10 = N1.getOperand(0);
20033 if (!N00.getValueType().isVector() ||
20034 N00.getValueType() != N10.getValueType() ||
20035 N->getValueType(0) != N10.getValueType())
20036 return SDValue();
20037
20038 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
20039 SDValue SMin =
20040 DAG.getNode(ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
20041 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
20042 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
20043}
20044
20045// Combine (truncate_vector_vl (umin X, C)) -> (vnclipu_vl X) if C is the
20046// maximum value for the truncated type.
20047// Combine (truncate_vector_vl (smin (smax X, C2), C1)) -> (vnclip_vl X) if C1
20048// is the signed maximum value for the truncated type and C2 is the signed
20049// minimum value.
20051 const RISCVSubtarget &Subtarget) {
20052 assert(N->getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL);
20053
20054 MVT VT = N->getSimpleValueType(0);
20055
20056 SDValue Mask = N->getOperand(1);
20057 SDValue VL = N->getOperand(2);
20058
20059 auto MatchMinMax = [&VL, &Mask](SDValue V, unsigned Opc, unsigned OpcVL,
20060 APInt &SplatVal) {
20061 if (V.getOpcode() != Opc &&
20062 !(V.getOpcode() == OpcVL && V.getOperand(2).isUndef() &&
20063 V.getOperand(3) == Mask && V.getOperand(4) == VL))
20064 return SDValue();
20065
20066 SDValue Op = V.getOperand(1);
20067
20068 // Peek through conversion between fixed and scalable vectors.
20069 if (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op.getOperand(0).isUndef() &&
20070 isNullConstant(Op.getOperand(2)) &&
20071 Op.getOperand(1).getValueType().isFixedLengthVector() &&
20072 Op.getOperand(1).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
20073 Op.getOperand(1).getOperand(0).getValueType() == Op.getValueType() &&
20074 isNullConstant(Op.getOperand(1).getOperand(1)))
20075 Op = Op.getOperand(1).getOperand(0);
20076
20077 if (ISD::isConstantSplatVector(Op.getNode(), SplatVal))
20078 return V.getOperand(0);
20079
20080 if (Op.getOpcode() == RISCVISD::VMV_V_X_VL && Op.getOperand(0).isUndef() &&
20081 Op.getOperand(2) == VL) {
20082 if (auto *Op1 = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
20083 SplatVal =
20084 Op1->getAPIntValue().sextOrTrunc(Op.getScalarValueSizeInBits());
20085 return V.getOperand(0);
20086 }
20087 }
20088
20089 return SDValue();
20090 };
20091
20092 SDLoc DL(N);
20093
20094 auto DetectUSatPattern = [&](SDValue V) {
20095 APInt LoC, HiC;
20096
20097 // Simple case, V is a UMIN.
20098 if (SDValue UMinOp = MatchMinMax(V, ISD::UMIN, RISCVISD::UMIN_VL, HiC))
20099 if (HiC.isMask(VT.getScalarSizeInBits()))
20100 return UMinOp;
20101
20102 // If we have an SMAX that removes negative numbers first, then we can match
20103 // SMIN instead of UMIN.
20104 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
20105 if (SDValue SMaxOp =
20106 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
20107 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()))
20108 return SMinOp;
20109
20110 // If we have an SMIN before an SMAX and the SMAX constant is less than or
20111 // equal to the SMIN constant, we can use vnclipu if we insert a new SMAX
20112 // first.
20113 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
20114 if (SDValue SMinOp =
20115 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
20116 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()) &&
20117 HiC.uge(LoC))
20118 return DAG.getNode(RISCVISD::SMAX_VL, DL, V.getValueType(), SMinOp,
20119 V.getOperand(1), DAG.getUNDEF(V.getValueType()),
20120 Mask, VL);
20121
20122 return SDValue();
20123 };
20124
20125 auto DetectSSatPattern = [&](SDValue V) {
20126 unsigned NumDstBits = VT.getScalarSizeInBits();
20127 unsigned NumSrcBits = V.getScalarValueSizeInBits();
20128 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
20129 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
20130
20131 APInt HiC, LoC;
20132 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
20133 if (SDValue SMaxOp =
20134 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
20135 if (HiC == SignedMax && LoC == SignedMin)
20136 return SMaxOp;
20137
20138 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
20139 if (SDValue SMinOp =
20140 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
20141 if (HiC == SignedMax && LoC == SignedMin)
20142 return SMinOp;
20143
20144 return SDValue();
20145 };
20146
20147 SDValue Src = N->getOperand(0);
20148
20149 // Look through multiple layers of truncates.
20150 while (Src.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
20151 Src.getOperand(1) == Mask && Src.getOperand(2) == VL &&
20152 Src.hasOneUse())
20153 Src = Src.getOperand(0);
20154
20155 SDValue Val;
20156 unsigned ClipOpc;
20157 if ((Val = DetectUSatPattern(Src)))
20158 ClipOpc = RISCVISD::TRUNCATE_VECTOR_VL_USAT;
20159 else if ((Val = DetectSSatPattern(Src)))
20160 ClipOpc = RISCVISD::TRUNCATE_VECTOR_VL_SSAT;
20161 else
20162 return SDValue();
20163
20164 MVT ValVT = Val.getSimpleValueType();
20165
20166 do {
20167 MVT ValEltVT = MVT::getIntegerVT(ValVT.getScalarSizeInBits() / 2);
20168 ValVT = ValVT.changeVectorElementType(ValEltVT);
20169 Val = DAG.getNode(ClipOpc, DL, ValVT, Val, Mask, VL);
20170 } while (ValVT != VT);
20171
20172 return Val;
20173}
20174
20175// Convert
20176// (iX ctpop (bitcast (vXi1 A)))
20177// ->
20178// (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))
20179// and
20180// (iN reduce.add (zext (vXi1 A to vXiN))
20181// ->
20182// (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))
20183// FIXME: It's complicated to match all the variations of this after type
20184// legalization so we only handle the pre-type legalization pattern, but that
20185// requires the fixed vector type to be legal.
20187 const RISCVSubtarget &Subtarget) {
20188 unsigned Opc = N->getOpcode();
20189 assert((Opc == ISD::CTPOP || Opc == ISD::VECREDUCE_ADD) &&
20190 "Unexpected opcode");
20191 EVT VT = N->getValueType(0);
20192 if (!VT.isScalarInteger())
20193 return SDValue();
20194
20195 SDValue Src = N->getOperand(0);
20196
20197 if (Opc == ISD::CTPOP) {
20198 // Peek through zero_extend. It doesn't change the count.
20199 if (Src.getOpcode() == ISD::ZERO_EXTEND)
20200 Src = Src.getOperand(0);
20201
20202 if (Src.getOpcode() != ISD::BITCAST)
20203 return SDValue();
20204 Src = Src.getOperand(0);
20205 } else if (Opc == ISD::VECREDUCE_ADD) {
20206 if (Src.getOpcode() != ISD::ZERO_EXTEND)
20207 return SDValue();
20208 Src = Src.getOperand(0);
20209 }
20210
20211 EVT SrcEVT = Src.getValueType();
20212 if (!SrcEVT.isSimple())
20213 return SDValue();
20214
20215 MVT SrcMVT = SrcEVT.getSimpleVT();
20216 // Make sure the input is an i1 vector.
20217 if (!SrcMVT.isVector() || SrcMVT.getVectorElementType() != MVT::i1)
20218 return SDValue();
20219
20220 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20221 if (!TLI.isTypeLegal(SrcMVT))
20222 return SDValue();
20223
20224 // Check that destination type is large enough to hold result without
20225 // overflow.
20226 if (Opc == ISD::VECREDUCE_ADD) {
20227 unsigned EltSize = SrcMVT.getScalarSizeInBits();
20228 unsigned MinSize = SrcMVT.getSizeInBits().getKnownMinValue();
20229 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
20230 unsigned MaxVLMAX = SrcMVT.isFixedLengthVector()
20231 ? SrcMVT.getVectorNumElements()
20233 VectorBitsMax, EltSize, MinSize);
20234 if (VT.getFixedSizeInBits() < Log2_32(MaxVLMAX) + 1)
20235 return SDValue();
20236 }
20237
20238 MVT ContainerVT = SrcMVT;
20239 if (SrcMVT.isFixedLengthVector()) {
20240 ContainerVT = getContainerForFixedLengthVector(DAG, SrcMVT, Subtarget);
20241 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
20242 }
20243
20244 SDLoc DL(N);
20245 auto [Mask, VL] = getDefaultVLOps(SrcMVT, ContainerVT, DL, DAG, Subtarget);
20246
20247 MVT XLenVT = Subtarget.getXLenVT();
20248 SDValue Pop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Src, Mask, VL);
20249 return DAG.getZExtOrTrunc(Pop, DL, VT);
20250}
20251
20254 const RISCVSubtarget &Subtarget) {
20255 // (shl (zext x), y) -> (vwsll x, y)
20256 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20257 return V;
20258
20259 // (shl (sext x), C) -> (vwmulsu x, 1u << C)
20260 // (shl (zext x), C) -> (vwmulu x, 1u << C)
20261
20262 if (!DCI.isAfterLegalizeDAG())
20263 return SDValue();
20264
20265 SDValue LHS = N->getOperand(0);
20266 if (!LHS.hasOneUse())
20267 return SDValue();
20268 unsigned Opcode;
20269 switch (LHS.getOpcode()) {
20270 case ISD::SIGN_EXTEND:
20271 case RISCVISD::VSEXT_VL:
20272 Opcode = RISCVISD::VWMULSU_VL;
20273 break;
20274 case ISD::ZERO_EXTEND:
20275 case RISCVISD::VZEXT_VL:
20276 Opcode = RISCVISD::VWMULU_VL;
20277 break;
20278 default:
20279 return SDValue();
20280 }
20281
20282 SDValue RHS = N->getOperand(1);
20283 APInt ShAmt;
20284 uint64_t ShAmtInt;
20285 if (ISD::isConstantSplatVector(RHS.getNode(), ShAmt))
20286 ShAmtInt = ShAmt.getZExtValue();
20287 else if (RHS.getOpcode() == RISCVISD::VMV_V_X_VL &&
20288 RHS.getOperand(1).getOpcode() == ISD::Constant)
20289 ShAmtInt = RHS.getConstantOperandVal(1);
20290 else
20291 return SDValue();
20292
20293 // Better foldings:
20294 // (shl (sext x), 1) -> (vwadd x, x)
20295 // (shl (zext x), 1) -> (vwaddu x, x)
20296 if (ShAmtInt <= 1)
20297 return SDValue();
20298
20299 SDValue NarrowOp = LHS.getOperand(0);
20300 MVT NarrowVT = NarrowOp.getSimpleValueType();
20301 uint64_t NarrowBits = NarrowVT.getScalarSizeInBits();
20302 if (ShAmtInt >= NarrowBits)
20303 return SDValue();
20304 MVT VT = N->getSimpleValueType(0);
20305 if (NarrowBits * 2 != VT.getScalarSizeInBits())
20306 return SDValue();
20307
20308 SelectionDAG &DAG = DCI.DAG;
20309 SDLoc DL(N);
20310 SDValue Passthru, Mask, VL;
20311 switch (N->getOpcode()) {
20312 case ISD::SHL:
20313 Passthru = DAG.getUNDEF(VT);
20314 std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
20315 break;
20316 case RISCVISD::SHL_VL:
20317 Passthru = N->getOperand(2);
20318 Mask = N->getOperand(3);
20319 VL = N->getOperand(4);
20320 break;
20321 default:
20322 llvm_unreachable("Expected SHL");
20323 }
20324 return DAG.getNode(Opcode, DL, VT, NarrowOp,
20325 DAG.getConstant(1ULL << ShAmtInt, SDLoc(RHS), NarrowVT),
20326 Passthru, Mask, VL);
20327}
20328
20330 DAGCombinerInfo &DCI) const {
20331 SelectionDAG &DAG = DCI.DAG;
20332 const MVT XLenVT = Subtarget.getXLenVT();
20333 SDLoc DL(N);
20334
20335 // Helper to call SimplifyDemandedBits on an operand of N where only some low
20336 // bits are demanded. N will be added to the Worklist if it was not deleted.
20337 // Caller should return SDValue(N, 0) if this returns true.
20338 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
20339 SDValue Op = N->getOperand(OpNo);
20340 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
20341 if (!SimplifyDemandedBits(Op, Mask, DCI))
20342 return false;
20343
20344 if (N->getOpcode() != ISD::DELETED_NODE)
20345 DCI.AddToWorklist(N);
20346 return true;
20347 };
20348
20349 switch (N->getOpcode()) {
20350 default:
20351 break;
20352 case RISCVISD::SplitF64: {
20353 SDValue Op0 = N->getOperand(0);
20354 // If the input to SplitF64 is just BuildPairF64 then the operation is
20355 // redundant. Instead, use BuildPairF64's operands directly.
20356 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
20357 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
20358
20359 if (Op0->isUndef()) {
20360 SDValue Lo = DAG.getUNDEF(MVT::i32);
20361 SDValue Hi = DAG.getUNDEF(MVT::i32);
20362 return DCI.CombineTo(N, Lo, Hi);
20363 }
20364
20365 // It's cheaper to materialise two 32-bit integers than to load a double
20366 // from the constant pool and transfer it to integer registers through the
20367 // stack.
20369 APInt V = C->getValueAPF().bitcastToAPInt();
20370 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
20371 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
20372 return DCI.CombineTo(N, Lo, Hi);
20373 }
20374
20375 // This is a target-specific version of a DAGCombine performed in
20376 // DAGCombiner::visitBITCAST. It performs the equivalent of:
20377 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
20378 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
20379 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
20380 !Op0.getNode()->hasOneUse() || Subtarget.hasStdExtZdinx())
20381 break;
20382 SDValue NewSplitF64 =
20383 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
20384 Op0.getOperand(0));
20385 SDValue Lo = NewSplitF64.getValue(0);
20386 SDValue Hi = NewSplitF64.getValue(1);
20387 APInt SignBit = APInt::getSignMask(32);
20388 if (Op0.getOpcode() == ISD::FNEG) {
20389 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
20390 DAG.getConstant(SignBit, DL, MVT::i32));
20391 return DCI.CombineTo(N, Lo, NewHi);
20392 }
20393 assert(Op0.getOpcode() == ISD::FABS);
20394 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
20395 DAG.getConstant(~SignBit, DL, MVT::i32));
20396 return DCI.CombineTo(N, Lo, NewHi);
20397 }
20398 case RISCVISD::SLLW:
20399 case RISCVISD::SRAW:
20400 case RISCVISD::SRLW:
20401 case RISCVISD::RORW:
20402 case RISCVISD::ROLW: {
20403 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
20404 if (SimplifyDemandedLowBitsHelper(0, 32) ||
20405 SimplifyDemandedLowBitsHelper(1, 5))
20406 return SDValue(N, 0);
20407
20408 break;
20409 }
20410 case RISCVISD::ABSW:
20411 case RISCVISD::CLZW:
20412 case RISCVISD::CTZW: {
20413 // Only the lower 32 bits of the first operand are read
20414 if (SimplifyDemandedLowBitsHelper(0, 32))
20415 return SDValue(N, 0);
20416 break;
20417 }
20418 case RISCVISD::FMV_W_X_RV64: {
20419 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
20420 // conversion is unnecessary and can be replaced with the
20421 // FMV_X_ANYEXTW_RV64 operand.
20422 SDValue Op0 = N->getOperand(0);
20423 if (Op0.getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64)
20424 return Op0.getOperand(0);
20425 break;
20426 }
20427 case RISCVISD::FMV_X_ANYEXTH:
20428 case RISCVISD::FMV_X_ANYEXTW_RV64: {
20429 SDLoc DL(N);
20430 SDValue Op0 = N->getOperand(0);
20431 MVT VT = N->getSimpleValueType(0);
20432
20433 // Constant fold.
20434 if (auto *CFP = dyn_cast<ConstantFPSDNode>(Op0)) {
20435 APInt Val = CFP->getValueAPF().bitcastToAPInt().sext(VT.getSizeInBits());
20436 return DAG.getConstant(Val, DL, VT);
20437 }
20438
20439 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
20440 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
20441 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
20442 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
20443 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
20444 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
20445 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
20446 assert(Op0.getOperand(0).getValueType() == VT &&
20447 "Unexpected value type!");
20448 return Op0.getOperand(0);
20449 }
20450
20451 if (ISD::isNormalLoad(Op0.getNode()) && Op0.hasOneUse() &&
20452 cast<LoadSDNode>(Op0)->isSimple()) {
20454 auto *LN0 = cast<LoadSDNode>(Op0);
20455 SDValue Load =
20456 DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(),
20457 LN0->getBasePtr(), IVT, LN0->getMemOperand());
20458 DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
20459 return Load;
20460 }
20461
20462 // This is a target-specific version of a DAGCombine performed in
20463 // DAGCombiner::visitBITCAST. It performs the equivalent of:
20464 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
20465 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
20466 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
20467 !Op0.getNode()->hasOneUse())
20468 break;
20469 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
20470 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
20471 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
20472 if (Op0.getOpcode() == ISD::FNEG)
20473 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
20474 DAG.getConstant(SignBit, DL, VT));
20475
20476 assert(Op0.getOpcode() == ISD::FABS);
20477 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
20478 DAG.getConstant(~SignBit, DL, VT));
20479 }
20480 case ISD::ABS: {
20481 EVT VT = N->getValueType(0);
20482 SDValue N0 = N->getOperand(0);
20483 // abs (sext) -> zext (abs)
20484 // abs (zext) -> zext (handled elsewhere)
20485 if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
20486 SDValue Src = N0.getOperand(0);
20487 SDLoc DL(N);
20488 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
20489 DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src));
20490 }
20491 break;
20492 }
20493 case ISD::ADD: {
20494 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20495 return V;
20496 if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
20497 return V;
20498 if (SDValue V = combineVqdotAccum(N, DAG, Subtarget))
20499 return V;
20500 return performADDCombine(N, DCI, Subtarget);
20501 }
20502 case ISD::SUB: {
20503 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20504 return V;
20505 return performSUBCombine(N, DAG, Subtarget);
20506 }
20507 case ISD::AND:
20508 return performANDCombine(N, DCI, Subtarget);
20509 case ISD::OR: {
20510 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20511 return V;
20512 return performORCombine(N, DCI, Subtarget);
20513 }
20514 case ISD::XOR:
20515 return performXORCombine(N, DAG, Subtarget);
20516 case ISD::MUL:
20517 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20518 return V;
20519 return performMULCombine(N, DAG, DCI, Subtarget);
20520 case ISD::SDIV:
20521 case ISD::UDIV:
20522 case ISD::SREM:
20523 case ISD::UREM:
20524 if (SDValue V = combineBinOpOfZExt(N, DAG))
20525 return V;
20526 break;
20527 case ISD::FMUL: {
20528 using namespace SDPatternMatch;
20529 SDLoc DL(N);
20530 EVT VT = N->getValueType(0);
20531 SDValue X, Y;
20532 // InstCombine canonicalizes fneg (fmul x, y) -> fmul x, (fneg y), see
20533 // hoistFNegAboveFMulFDiv.
20534 // Undo this and sink the fneg so we match more fmsub/fnmadd patterns.
20536 return DAG.getNode(ISD::FNEG, DL, VT,
20537 DAG.getNode(ISD::FMUL, DL, VT, X, Y));
20538
20539 // fmul X, (copysign 1.0, Y) -> fsgnjx X, Y
20540 SDValue N0 = N->getOperand(0);
20541 SDValue N1 = N->getOperand(1);
20542 if (N0->getOpcode() != ISD::FCOPYSIGN)
20543 std::swap(N0, N1);
20544 if (N0->getOpcode() != ISD::FCOPYSIGN)
20545 return SDValue();
20547 if (!C || !C->getValueAPF().isExactlyValue(+1.0))
20548 return SDValue();
20549 if (VT.isVector() || !isOperationLegal(ISD::FCOPYSIGN, VT))
20550 return SDValue();
20551 SDValue Sign = N0->getOperand(1);
20552 if (Sign.getValueType() != VT)
20553 return SDValue();
20554 return DAG.getNode(RISCVISD::FSGNJX, DL, VT, N1, N0->getOperand(1));
20555 }
20556 case ISD::FADD:
20557 case ISD::UMAX:
20558 case ISD::UMIN:
20559 case ISD::SMAX:
20560 case ISD::SMIN:
20561 case ISD::FMAXNUM:
20562 case ISD::FMINNUM: {
20563 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
20564 return V;
20565 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
20566 return V;
20567 return SDValue();
20568 }
20569 case ISD::SETCC:
20570 return performSETCCCombine(N, DCI, Subtarget);
20572 return performSIGN_EXTEND_INREGCombine(N, DCI, Subtarget);
20573 case ISD::ZERO_EXTEND:
20574 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
20575 // type legalization. This is safe because fp_to_uint produces poison if
20576 // it overflows.
20577 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
20578 SDValue Src = N->getOperand(0);
20579 if (Src.getOpcode() == ISD::FP_TO_UINT &&
20580 isTypeLegal(Src.getOperand(0).getValueType()))
20581 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
20582 Src.getOperand(0));
20583 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
20584 isTypeLegal(Src.getOperand(1).getValueType())) {
20585 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
20586 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
20587 Src.getOperand(0), Src.getOperand(1));
20588 DCI.CombineTo(N, Res);
20589 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
20590 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
20591 return SDValue(N, 0); // Return N so it doesn't get rechecked.
20592 }
20593 }
20594 return SDValue();
20595 case RISCVISD::TRUNCATE_VECTOR_VL:
20596 if (SDValue V = combineTruncOfSraSext(N, DAG))
20597 return V;
20598 return combineTruncToVnclip(N, DAG, Subtarget);
20599 case ISD::VP_TRUNCATE:
20600 return performVP_TRUNCATECombine(N, DAG, Subtarget);
20601 case ISD::TRUNCATE:
20602 return performTRUNCATECombine(N, DAG, Subtarget);
20603 case ISD::SELECT:
20604 return performSELECTCombine(N, DAG, Subtarget);
20605 case ISD::VSELECT:
20606 return performVSELECTCombine(N, DAG);
20607 case RISCVISD::CZERO_EQZ:
20608 case RISCVISD::CZERO_NEZ: {
20609 SDValue Val = N->getOperand(0);
20610 SDValue Cond = N->getOperand(1);
20611
20612 unsigned Opc = N->getOpcode();
20613
20614 // czero_eqz x, x -> x
20615 if (Opc == RISCVISD::CZERO_EQZ && Val == Cond)
20616 return Val;
20617
20618 unsigned InvOpc =
20619 Opc == RISCVISD::CZERO_EQZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ;
20620
20621 // czero_eqz X, (xor Y, 1) -> czero_nez X, Y if Y is 0 or 1.
20622 // czero_nez X, (xor Y, 1) -> czero_eqz X, Y if Y is 0 or 1.
20623 if (Cond.getOpcode() == ISD::XOR && isOneConstant(Cond.getOperand(1))) {
20624 SDValue NewCond = Cond.getOperand(0);
20625 APInt Mask = APInt::getBitsSetFrom(NewCond.getValueSizeInBits(), 1);
20626 if (DAG.MaskedValueIsZero(NewCond, Mask))
20627 return DAG.getNode(InvOpc, SDLoc(N), N->getValueType(0), Val, NewCond);
20628 }
20629 // czero_eqz x, (setcc y, 0, ne) -> czero_eqz x, y
20630 // czero_nez x, (setcc y, 0, ne) -> czero_nez x, y
20631 // czero_eqz x, (setcc y, 0, eq) -> czero_nez x, y
20632 // czero_nez x, (setcc y, 0, eq) -> czero_eqz x, y
20633 if (Cond.getOpcode() == ISD::SETCC && isNullConstant(Cond.getOperand(1))) {
20634 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
20635 if (ISD::isIntEqualitySetCC(CCVal))
20636 return DAG.getNode(CCVal == ISD::SETNE ? Opc : InvOpc, SDLoc(N),
20637 N->getValueType(0), Val, Cond.getOperand(0));
20638 }
20639 return SDValue();
20640 }
20641 case RISCVISD::SELECT_CC: {
20642 // Transform
20643 SDValue LHS = N->getOperand(0);
20644 SDValue RHS = N->getOperand(1);
20645 SDValue CC = N->getOperand(2);
20646 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
20647 SDValue TrueV = N->getOperand(3);
20648 SDValue FalseV = N->getOperand(4);
20649 SDLoc DL(N);
20650 EVT VT = N->getValueType(0);
20651
20652 // If the True and False values are the same, we don't need a select_cc.
20653 if (TrueV == FalseV)
20654 return TrueV;
20655
20656 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
20657 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
20658 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
20659 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
20660 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
20661 if (CCVal == ISD::CondCode::SETGE)
20662 std::swap(TrueV, FalseV);
20663
20664 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
20665 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
20666 // Only handle simm12, if it is not in this range, it can be considered as
20667 // register.
20668 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
20669 isInt<12>(TrueSImm - FalseSImm)) {
20670 SDValue SRA =
20671 DAG.getNode(ISD::SRA, DL, VT, LHS,
20672 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
20673 SDValue AND =
20674 DAG.getNode(ISD::AND, DL, VT, SRA,
20675 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
20676 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
20677 }
20678
20679 if (CCVal == ISD::CondCode::SETGE)
20680 std::swap(TrueV, FalseV);
20681 }
20682
20683 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
20684 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
20685 {LHS, RHS, CC, TrueV, FalseV});
20686
20687 if (!Subtarget.hasConditionalMoveFusion()) {
20688 // (select c, -1, y) -> -c | y
20689 if (isAllOnesConstant(TrueV)) {
20690 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
20691 SDValue Neg = DAG.getNegative(C, DL, VT);
20692 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
20693 }
20694 // (select c, y, -1) -> -!c | y
20695 if (isAllOnesConstant(FalseV)) {
20696 SDValue C =
20697 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
20698 SDValue Neg = DAG.getNegative(C, DL, VT);
20699 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
20700 }
20701
20702 // (select c, 0, y) -> -!c & y
20703 if (isNullConstant(TrueV)) {
20704 SDValue C =
20705 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
20706 SDValue Neg = DAG.getNegative(C, DL, VT);
20707 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
20708 }
20709 // (select c, y, 0) -> -c & y
20710 if (isNullConstant(FalseV)) {
20711 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
20712 SDValue Neg = DAG.getNegative(C, DL, VT);
20713 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
20714 }
20715 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
20716 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
20717 if (((isOneConstant(FalseV) && LHS == TrueV &&
20718 CCVal == ISD::CondCode::SETNE) ||
20719 (isOneConstant(TrueV) && LHS == FalseV &&
20720 CCVal == ISD::CondCode::SETEQ)) &&
20721 isNullConstant(RHS)) {
20722 // freeze it to be safe.
20723 LHS = DAG.getFreeze(LHS);
20724 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, ISD::CondCode::SETEQ);
20725 return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
20726 }
20727 }
20728
20729 // If both true/false are an xor with 1, pull through the select.
20730 // This can occur after op legalization if both operands are setccs that
20731 // require an xor to invert.
20732 // FIXME: Generalize to other binary ops with identical operand?
20733 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
20734 TrueV.getOperand(1) == FalseV.getOperand(1) &&
20735 isOneConstant(TrueV.getOperand(1)) &&
20736 TrueV.hasOneUse() && FalseV.hasOneUse()) {
20737 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
20738 TrueV.getOperand(0), FalseV.getOperand(0));
20739 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
20740 }
20741
20742 return SDValue();
20743 }
20744 case RISCVISD::BR_CC: {
20745 SDValue LHS = N->getOperand(1);
20746 SDValue RHS = N->getOperand(2);
20747 SDValue CC = N->getOperand(3);
20748 SDLoc DL(N);
20749
20750 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
20751 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
20752 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
20753
20754 return SDValue();
20755 }
20756 case ISD::BITREVERSE:
20757 return performBITREVERSECombine(N, DAG, Subtarget);
20758 case ISD::FP_TO_SINT:
20759 case ISD::FP_TO_UINT:
20760 return performFP_TO_INTCombine(N, DCI, Subtarget);
20763 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
20764 case ISD::FCOPYSIGN: {
20765 EVT VT = N->getValueType(0);
20766 if (!VT.isVector())
20767 break;
20768 // There is a form of VFSGNJ which injects the negated sign of its second
20769 // operand. Try and bubble any FNEG up after the extend/round to produce
20770 // this optimized pattern. Avoid modifying cases where FP_ROUND and
20771 // TRUNC=1.
20772 SDValue In2 = N->getOperand(1);
20773 // Avoid cases where the extend/round has multiple uses, as duplicating
20774 // those is typically more expensive than removing a fneg.
20775 if (!In2.hasOneUse())
20776 break;
20777 if (In2.getOpcode() != ISD::FP_EXTEND &&
20778 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
20779 break;
20780 In2 = In2.getOperand(0);
20781 if (In2.getOpcode() != ISD::FNEG)
20782 break;
20783 SDLoc DL(N);
20784 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
20785 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
20786 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
20787 }
20788 case ISD::MGATHER: {
20789 const auto *MGN = cast<MaskedGatherSDNode>(N);
20790 const EVT VT = N->getValueType(0);
20791 SDValue Index = MGN->getIndex();
20792 SDValue ScaleOp = MGN->getScale();
20793 ISD::MemIndexType IndexType = MGN->getIndexType();
20794 assert(!MGN->isIndexScaled() &&
20795 "Scaled gather/scatter should not be formed");
20796
20797 SDLoc DL(N);
20798 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20799 return DAG.getMaskedGather(
20800 N->getVTList(), MGN->getMemoryVT(), DL,
20801 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
20802 MGN->getBasePtr(), Index, ScaleOp},
20803 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
20804
20805 if (narrowIndex(Index, IndexType, DAG))
20806 return DAG.getMaskedGather(
20807 N->getVTList(), MGN->getMemoryVT(), DL,
20808 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
20809 MGN->getBasePtr(), Index, ScaleOp},
20810 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
20811
20812 if (Index.getOpcode() == ISD::BUILD_VECTOR &&
20813 MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
20814 // The sequence will be XLenVT, not the type of Index. Tell
20815 // isSimpleVIDSequence this so we avoid overflow.
20816 if (std::optional<VIDSequence> SimpleVID =
20817 isSimpleVIDSequence(Index, Subtarget.getXLen());
20818 SimpleVID && SimpleVID->StepDenominator == 1) {
20819 const int64_t StepNumerator = SimpleVID->StepNumerator;
20820 const int64_t Addend = SimpleVID->Addend;
20821
20822 // Note: We don't need to check alignment here since (by assumption
20823 // from the existence of the gather), our offsets must be sufficiently
20824 // aligned.
20825
20826 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
20827 assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
20828 assert(IndexType == ISD::UNSIGNED_SCALED);
20829 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
20830 DAG.getSignedConstant(Addend, DL, PtrVT));
20831
20832 SDValue EVL = DAG.getElementCount(DL, Subtarget.getXLenVT(),
20834 SDValue StridedLoad = DAG.getStridedLoadVP(
20835 VT, DL, MGN->getChain(), BasePtr,
20836 DAG.getSignedConstant(StepNumerator, DL, XLenVT), MGN->getMask(),
20837 EVL, MGN->getMemOperand());
20838 SDValue Select = DAG.getSelect(DL, VT, MGN->getMask(), StridedLoad,
20839 MGN->getPassThru());
20840 return DAG.getMergeValues({Select, SDValue(StridedLoad.getNode(), 1)},
20841 DL);
20842 }
20843 }
20844
20845 SmallVector<int> ShuffleMask;
20846 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
20847 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
20848 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
20849 MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
20850 MGN->getMask(), DAG.getUNDEF(VT),
20851 MGN->getMemoryVT(), MGN->getMemOperand(),
20853 SDValue Shuffle =
20854 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
20855 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
20856 }
20857
20858 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
20859 matchIndexAsWiderOp(VT, Index, MGN->getMask(),
20860 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
20861 SmallVector<SDValue> NewIndices;
20862 for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
20863 NewIndices.push_back(Index.getOperand(i));
20864 EVT IndexVT = Index.getValueType()
20866 Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
20867
20868 unsigned ElementSize = VT.getScalarStoreSize();
20869 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
20870 auto EltCnt = VT.getVectorElementCount();
20871 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
20872 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
20873 EltCnt.divideCoefficientBy(2));
20874 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
20875 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
20876 EltCnt.divideCoefficientBy(2));
20877 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
20878
20879 SDValue Gather =
20880 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
20881 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
20882 Index, ScaleOp},
20883 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
20884 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
20885 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
20886 }
20887 break;
20888 }
20889 case ISD::MSCATTER:{
20890 const auto *MSN = cast<MaskedScatterSDNode>(N);
20891 SDValue Index = MSN->getIndex();
20892 SDValue ScaleOp = MSN->getScale();
20893 ISD::MemIndexType IndexType = MSN->getIndexType();
20894 assert(!MSN->isIndexScaled() &&
20895 "Scaled gather/scatter should not be formed");
20896
20897 SDLoc DL(N);
20898 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20899 return DAG.getMaskedScatter(
20900 N->getVTList(), MSN->getMemoryVT(), DL,
20901 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
20902 Index, ScaleOp},
20903 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
20904
20905 if (narrowIndex(Index, IndexType, DAG))
20906 return DAG.getMaskedScatter(
20907 N->getVTList(), MSN->getMemoryVT(), DL,
20908 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
20909 Index, ScaleOp},
20910 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
20911
20912 EVT VT = MSN->getValue()->getValueType(0);
20913 SmallVector<int> ShuffleMask;
20914 if (!MSN->isTruncatingStore() &&
20915 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
20916 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
20917 DAG.getUNDEF(VT), ShuffleMask);
20918 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
20919 DAG.getUNDEF(XLenVT), MSN->getMask(),
20920 MSN->getMemoryVT(), MSN->getMemOperand(),
20921 ISD::UNINDEXED, false);
20922 }
20923 break;
20924 }
20925 case ISD::VP_GATHER: {
20926 const auto *VPGN = cast<VPGatherSDNode>(N);
20927 SDValue Index = VPGN->getIndex();
20928 SDValue ScaleOp = VPGN->getScale();
20929 ISD::MemIndexType IndexType = VPGN->getIndexType();
20930 assert(!VPGN->isIndexScaled() &&
20931 "Scaled gather/scatter should not be formed");
20932
20933 SDLoc DL(N);
20934 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20935 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
20936 {VPGN->getChain(), VPGN->getBasePtr(), Index,
20937 ScaleOp, VPGN->getMask(),
20938 VPGN->getVectorLength()},
20939 VPGN->getMemOperand(), IndexType);
20940
20941 if (narrowIndex(Index, IndexType, DAG))
20942 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
20943 {VPGN->getChain(), VPGN->getBasePtr(), Index,
20944 ScaleOp, VPGN->getMask(),
20945 VPGN->getVectorLength()},
20946 VPGN->getMemOperand(), IndexType);
20947
20948 break;
20949 }
20950 case ISD::VP_SCATTER: {
20951 const auto *VPSN = cast<VPScatterSDNode>(N);
20952 SDValue Index = VPSN->getIndex();
20953 SDValue ScaleOp = VPSN->getScale();
20954 ISD::MemIndexType IndexType = VPSN->getIndexType();
20955 assert(!VPSN->isIndexScaled() &&
20956 "Scaled gather/scatter should not be formed");
20957
20958 SDLoc DL(N);
20959 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
20960 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
20961 {VPSN->getChain(), VPSN->getValue(),
20962 VPSN->getBasePtr(), Index, ScaleOp,
20963 VPSN->getMask(), VPSN->getVectorLength()},
20964 VPSN->getMemOperand(), IndexType);
20965
20966 if (narrowIndex(Index, IndexType, DAG))
20967 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
20968 {VPSN->getChain(), VPSN->getValue(),
20969 VPSN->getBasePtr(), Index, ScaleOp,
20970 VPSN->getMask(), VPSN->getVectorLength()},
20971 VPSN->getMemOperand(), IndexType);
20972 break;
20973 }
20974 case RISCVISD::SHL_VL:
20975 if (SDValue V = performSHLCombine(N, DCI, Subtarget))
20976 return V;
20977 [[fallthrough]];
20978 case RISCVISD::SRA_VL:
20979 case RISCVISD::SRL_VL: {
20980 SDValue ShAmt = N->getOperand(1);
20981 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
20982 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
20983 SDLoc DL(N);
20984 SDValue VL = N->getOperand(4);
20985 EVT VT = N->getValueType(0);
20986 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
20987 ShAmt.getOperand(1), VL);
20988 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
20989 N->getOperand(2), N->getOperand(3), N->getOperand(4));
20990 }
20991 break;
20992 }
20993 case ISD::SRA:
20994 if (SDValue V = performSRACombine(N, DAG, Subtarget))
20995 return V;
20996 [[fallthrough]];
20997 case ISD::SRL:
20998 case ISD::SHL: {
20999 if (N->getOpcode() == ISD::SHL) {
21000 if (SDValue V = performSHLCombine(N, DCI, Subtarget))
21001 return V;
21002 }
21003 SDValue ShAmt = N->getOperand(1);
21004 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
21005 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
21006 SDLoc DL(N);
21007 EVT VT = N->getValueType(0);
21008 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
21009 ShAmt.getOperand(1),
21010 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
21011 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
21012 }
21013 break;
21014 }
21015 case RISCVISD::ADD_VL:
21016 if (SDValue V = simplifyOp_VL(N))
21017 return V;
21018 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
21019 return V;
21020 if (SDValue V = combineVqdotAccum(N, DAG, Subtarget))
21021 return V;
21022 return combineToVWMACC(N, DAG, Subtarget);
21023 case RISCVISD::VWADD_W_VL:
21024 case RISCVISD::VWADDU_W_VL:
21025 case RISCVISD::VWSUB_W_VL:
21026 case RISCVISD::VWSUBU_W_VL:
21027 return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
21028 case RISCVISD::OR_VL:
21029 case RISCVISD::SUB_VL:
21030 case RISCVISD::MUL_VL:
21031 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
21032 case RISCVISD::VFMADD_VL:
21033 case RISCVISD::VFNMADD_VL:
21034 case RISCVISD::VFMSUB_VL:
21035 case RISCVISD::VFNMSUB_VL:
21036 case RISCVISD::STRICT_VFMADD_VL:
21037 case RISCVISD::STRICT_VFNMADD_VL:
21038 case RISCVISD::STRICT_VFMSUB_VL:
21039 case RISCVISD::STRICT_VFNMSUB_VL:
21040 return performVFMADD_VLCombine(N, DCI, Subtarget);
21041 case RISCVISD::FADD_VL:
21042 case RISCVISD::FSUB_VL:
21043 case RISCVISD::FMUL_VL:
21044 case RISCVISD::VFWADD_W_VL:
21045 case RISCVISD::VFWSUB_W_VL:
21046 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
21047 case ISD::LOAD:
21048 case ISD::STORE: {
21049 if (DCI.isAfterLegalizeDAG())
21050 if (SDValue V = performMemPairCombine(N, DCI))
21051 return V;
21052
21053 if (N->getOpcode() != ISD::STORE)
21054 break;
21055
21056 auto *Store = cast<StoreSDNode>(N);
21057 SDValue Chain = Store->getChain();
21058 EVT MemVT = Store->getMemoryVT();
21059 SDValue Val = Store->getValue();
21060 SDLoc DL(N);
21061
21062 bool IsScalarizable =
21063 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
21064 Store->isSimple() &&
21065 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
21066 isPowerOf2_64(MemVT.getSizeInBits()) &&
21067 MemVT.getSizeInBits() <= Subtarget.getXLen();
21068
21069 // If sufficiently aligned we can scalarize stores of constant vectors of
21070 // any power-of-two size up to XLen bits, provided that they aren't too
21071 // expensive to materialize.
21072 // vsetivli zero, 2, e8, m1, ta, ma
21073 // vmv.v.i v8, 4
21074 // vse64.v v8, (a0)
21075 // ->
21076 // li a1, 1028
21077 // sh a1, 0(a0)
21078 if (DCI.isBeforeLegalize() && IsScalarizable &&
21080 // Get the constant vector bits
21081 APInt NewC(Val.getValueSizeInBits(), 0);
21082 uint64_t EltSize = Val.getScalarValueSizeInBits();
21083 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
21084 if (Val.getOperand(i).isUndef())
21085 continue;
21086 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
21087 i * EltSize);
21088 }
21089 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
21090
21091 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
21092 true) <= 2 &&
21094 NewVT, *Store->getMemOperand())) {
21095 SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
21096 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
21097 Store->getPointerInfo(), Store->getBaseAlign(),
21098 Store->getMemOperand()->getFlags());
21099 }
21100 }
21101
21102 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
21103 // vsetivli zero, 2, e16, m1, ta, ma
21104 // vle16.v v8, (a0)
21105 // vse16.v v8, (a1)
21106 if (auto *L = dyn_cast<LoadSDNode>(Val);
21107 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
21108 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
21109 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
21110 L->getMemoryVT() == MemVT) {
21111 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
21113 NewVT, *Store->getMemOperand()) &&
21115 NewVT, *L->getMemOperand())) {
21116 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
21117 L->getPointerInfo(), L->getBaseAlign(),
21118 L->getMemOperand()->getFlags());
21119 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
21120 Store->getPointerInfo(), Store->getBaseAlign(),
21121 Store->getMemOperand()->getFlags());
21122 }
21123 }
21124
21125 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
21126 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
21127 // any illegal types.
21128 if ((Val.getOpcode() == RISCVISD::VMV_X_S ||
21129 (DCI.isAfterLegalizeDAG() &&
21131 isNullConstant(Val.getOperand(1)))) &&
21132 Val.hasOneUse()) {
21133 SDValue Src = Val.getOperand(0);
21134 MVT VecVT = Src.getSimpleValueType();
21135 // VecVT should be scalable and memory VT should match the element type.
21136 if (!Store->isIndexed() && VecVT.isScalableVector() &&
21137 MemVT == VecVT.getVectorElementType()) {
21138 SDLoc DL(N);
21139 MVT MaskVT = getMaskTypeFor(VecVT);
21140 return DAG.getStoreVP(
21141 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
21142 DAG.getConstant(1, DL, MaskVT),
21143 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
21144 Store->getMemOperand(), Store->getAddressingMode(),
21145 Store->isTruncatingStore(), /*IsCompress*/ false);
21146 }
21147 }
21148
21149 break;
21150 }
21151 case ISD::SPLAT_VECTOR: {
21152 EVT VT = N->getValueType(0);
21153 // Only perform this combine on legal MVT types.
21154 if (!isTypeLegal(VT))
21155 break;
21156 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
21157 DAG, Subtarget))
21158 return Gather;
21159 break;
21160 }
21161 case ISD::BUILD_VECTOR:
21162 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
21163 return V;
21164 break;
21166 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
21167 return V;
21168 break;
21170 if (SDValue V = performVECTOR_SHUFFLECombine(N, DAG, Subtarget, *this))
21171 return V;
21172 break;
21174 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
21175 return V;
21176 break;
21177 case RISCVISD::VFMV_V_F_VL: {
21178 const MVT VT = N->getSimpleValueType(0);
21179 SDValue Passthru = N->getOperand(0);
21180 SDValue Scalar = N->getOperand(1);
21181 SDValue VL = N->getOperand(2);
21182
21183 // If VL is 1, we can use vfmv.s.f.
21184 if (isOneConstant(VL))
21185 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
21186 break;
21187 }
21188 case RISCVISD::VMV_V_X_VL: {
21189 const MVT VT = N->getSimpleValueType(0);
21190 SDValue Passthru = N->getOperand(0);
21191 SDValue Scalar = N->getOperand(1);
21192 SDValue VL = N->getOperand(2);
21193
21194 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
21195 // scalar input.
21196 unsigned ScalarSize = Scalar.getValueSizeInBits();
21197 unsigned EltWidth = VT.getScalarSizeInBits();
21198 if (ScalarSize > EltWidth && Passthru.isUndef())
21199 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
21200 return SDValue(N, 0);
21201
21202 // If VL is 1 and the scalar value won't benefit from immediate, we can
21203 // use vmv.s.x.
21205 if (isOneConstant(VL) &&
21206 (!Const || Const->isZero() ||
21207 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
21208 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
21209
21210 break;
21211 }
21212 case RISCVISD::VFMV_S_F_VL: {
21213 SDValue Src = N->getOperand(1);
21214 // Try to remove vector->scalar->vector if the scalar->vector is inserting
21215 // into an undef vector.
21216 // TODO: Could use a vslide or vmv.v.v for non-undef.
21217 if (N->getOperand(0).isUndef() &&
21218 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
21219 isNullConstant(Src.getOperand(1)) &&
21220 Src.getOperand(0).getValueType().isScalableVector()) {
21221 EVT VT = N->getValueType(0);
21222 SDValue EVSrc = Src.getOperand(0);
21223 EVT EVSrcVT = EVSrc.getValueType();
21225 // Widths match, just return the original vector.
21226 if (EVSrcVT == VT)
21227 return EVSrc;
21228 SDLoc DL(N);
21229 // Width is narrower, using insert_subvector.
21230 if (EVSrcVT.getVectorMinNumElements() < VT.getVectorMinNumElements()) {
21231 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT),
21232 EVSrc,
21233 DAG.getConstant(0, DL, Subtarget.getXLenVT()));
21234 }
21235 // Width is wider, using extract_subvector.
21236 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, EVSrc,
21237 DAG.getConstant(0, DL, Subtarget.getXLenVT()));
21238 }
21239 [[fallthrough]];
21240 }
21241 case RISCVISD::VMV_S_X_VL: {
21242 const MVT VT = N->getSimpleValueType(0);
21243 SDValue Passthru = N->getOperand(0);
21244 SDValue Scalar = N->getOperand(1);
21245 SDValue VL = N->getOperand(2);
21246
21247 // The vmv.s.x instruction copies the scalar integer register to element 0
21248 // of the destination vector register. If SEW < XLEN, the least-significant
21249 // bits are copied and the upper XLEN-SEW bits are ignored.
21250 unsigned ScalarSize = Scalar.getValueSizeInBits();
21251 unsigned EltWidth = VT.getScalarSizeInBits();
21252 if (ScalarSize > EltWidth && SimplifyDemandedLowBitsHelper(1, EltWidth))
21253 return SDValue(N, 0);
21254
21255 if (Scalar.getOpcode() == RISCVISD::VMV_X_S && Passthru.isUndef() &&
21256 Scalar.getOperand(0).getValueType() == N->getValueType(0))
21257 return Scalar.getOperand(0);
21258
21259 // Use M1 or smaller to avoid over constraining register allocation
21260 const MVT M1VT = RISCVTargetLowering::getM1VT(VT);
21261 if (M1VT.bitsLT(VT)) {
21262 SDValue M1Passthru = DAG.getExtractSubvector(DL, M1VT, Passthru, 0);
21263 SDValue Result =
21264 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
21265 Result = DAG.getInsertSubvector(DL, Passthru, Result, 0);
21266 return Result;
21267 }
21268
21269 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
21270 // higher would involve overly constraining the register allocator for
21271 // no purpose.
21272 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
21273 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
21274 VT.bitsLE(RISCVTargetLowering::getM1VT(VT)) && Passthru.isUndef())
21275 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
21276
21277 break;
21278 }
21279 case RISCVISD::VMV_X_S: {
21280 SDValue Vec = N->getOperand(0);
21281 MVT VecVT = N->getOperand(0).getSimpleValueType();
21282 const MVT M1VT = RISCVTargetLowering::getM1VT(VecVT);
21283 if (M1VT.bitsLT(VecVT)) {
21284 Vec = DAG.getExtractSubvector(DL, M1VT, Vec, 0);
21285 return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec);
21286 }
21287 break;
21288 }
21292 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
21293 unsigned IntNo = N->getConstantOperandVal(IntOpNo);
21294 switch (IntNo) {
21295 // By default we do not combine any intrinsic.
21296 default:
21297 return SDValue();
21298 case Intrinsic::riscv_vcpop:
21299 case Intrinsic::riscv_vcpop_mask:
21300 case Intrinsic::riscv_vfirst:
21301 case Intrinsic::riscv_vfirst_mask: {
21302 SDValue VL = N->getOperand(2);
21303 if (IntNo == Intrinsic::riscv_vcpop_mask ||
21304 IntNo == Intrinsic::riscv_vfirst_mask)
21305 VL = N->getOperand(3);
21306 if (!isNullConstant(VL))
21307 return SDValue();
21308 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
21309 SDLoc DL(N);
21310 EVT VT = N->getValueType(0);
21311 if (IntNo == Intrinsic::riscv_vfirst ||
21312 IntNo == Intrinsic::riscv_vfirst_mask)
21313 return DAG.getAllOnesConstant(DL, VT);
21314 return DAG.getConstant(0, DL, VT);
21315 }
21316 case Intrinsic::riscv_vsseg2_mask:
21317 case Intrinsic::riscv_vsseg3_mask:
21318 case Intrinsic::riscv_vsseg4_mask:
21319 case Intrinsic::riscv_vsseg5_mask:
21320 case Intrinsic::riscv_vsseg6_mask:
21321 case Intrinsic::riscv_vsseg7_mask:
21322 case Intrinsic::riscv_vsseg8_mask: {
21323 SDValue Tuple = N->getOperand(2);
21324 unsigned NF = Tuple.getValueType().getRISCVVectorTupleNumFields();
21325
21326 if (Subtarget.hasOptimizedSegmentLoadStore(NF) || !Tuple.hasOneUse() ||
21327 Tuple.getOpcode() != RISCVISD::TUPLE_INSERT ||
21328 !Tuple.getOperand(0).isUndef())
21329 return SDValue();
21330
21331 SDValue Val = Tuple.getOperand(1);
21332 unsigned Idx = Tuple.getConstantOperandVal(2);
21333
21334 unsigned SEW = Val.getValueType().getScalarSizeInBits();
21335 assert(Log2_64(SEW) == N->getConstantOperandVal(6) &&
21336 "Type mismatch without bitcast?");
21337 unsigned Stride = SEW / 8 * NF;
21338 unsigned Offset = SEW / 8 * Idx;
21339
21340 SDValue Ops[] = {
21341 /*Chain=*/N->getOperand(0),
21342 /*IntID=*/
21343 DAG.getTargetConstant(Intrinsic::riscv_vsse_mask, DL, XLenVT),
21344 /*StoredVal=*/Val,
21345 /*Ptr=*/
21346 DAG.getNode(ISD::ADD, DL, XLenVT, N->getOperand(3),
21347 DAG.getConstant(Offset, DL, XLenVT)),
21348 /*Stride=*/DAG.getConstant(Stride, DL, XLenVT),
21349 /*Mask=*/N->getOperand(4),
21350 /*VL=*/N->getOperand(5)};
21351
21352 auto *OldMemSD = cast<MemIntrinsicSDNode>(N);
21353 // Match getTgtMemIntrinsic for non-unit stride case
21354 EVT MemVT = OldMemSD->getMemoryVT().getScalarType();
21357 OldMemSD->getMemOperand(), Offset, MemoryLocation::UnknownSize);
21358
21359 SDVTList VTs = DAG.getVTList(MVT::Other);
21360 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VTs, Ops, MemVT,
21361 MMO);
21362 }
21363 }
21364 }
21365 case ISD::EXPERIMENTAL_VP_REVERSE:
21366 return performVP_REVERSECombine(N, DAG, Subtarget);
21367 case ISD::VP_STORE:
21368 return performVP_STORECombine(N, DAG, Subtarget);
21369 case ISD::BITCAST: {
21370 assert(Subtarget.useRVVForFixedLengthVectors());
21371 SDValue N0 = N->getOperand(0);
21372 EVT VT = N->getValueType(0);
21373 EVT SrcVT = N0.getValueType();
21374 if (VT.isRISCVVectorTuple() && N0->getOpcode() == ISD::SPLAT_VECTOR) {
21375 unsigned NF = VT.getRISCVVectorTupleNumFields();
21376 unsigned NumScalElts = VT.getSizeInBits().getKnownMinValue() / (NF * 8);
21377 SDValue EltVal = DAG.getConstant(0, DL, Subtarget.getXLenVT());
21378 MVT ScalTy = MVT::getScalableVectorVT(MVT::getIntegerVT(8), NumScalElts);
21379
21380 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, DL, ScalTy, EltVal);
21381
21382 SDValue Result = DAG.getUNDEF(VT);
21383 for (unsigned i = 0; i < NF; ++i)
21384 Result = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Result, Splat,
21385 DAG.getTargetConstant(i, DL, MVT::i32));
21386 return Result;
21387 }
21388 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
21389 // type, widen both sides to avoid a trip through memory.
21390 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
21391 VT.isScalarInteger()) {
21392 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
21393 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
21394 Ops[0] = N0;
21395 SDLoc DL(N);
21396 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
21397 N0 = DAG.getBitcast(MVT::i8, N0);
21398 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
21399 }
21400
21401 return SDValue();
21402 }
21403 case ISD::VECREDUCE_ADD:
21404 if (SDValue V = performVECREDUCECombine(N, DAG, Subtarget, *this))
21405 return V;
21406 [[fallthrough]];
21407 case ISD::CTPOP:
21408 if (SDValue V = combineToVCPOP(N, DAG, Subtarget))
21409 return V;
21410 break;
21411 case RISCVISD::VRGATHER_VX_VL: {
21412 // Note this assumes that out of bounds indices produce poison
21413 // and can thus be replaced without having to prove them inbounds..
21414 EVT VT = N->getValueType(0);
21415 SDValue Src = N->getOperand(0);
21416 SDValue Idx = N->getOperand(1);
21417 SDValue Passthru = N->getOperand(2);
21418 SDValue VL = N->getOperand(4);
21419
21420 // Warning: Unlike most cases we strip an insert_subvector, this one
21421 // does not require the first operand to be undef.
21422 if (Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
21423 isNullConstant(Src.getOperand(2)))
21424 Src = Src.getOperand(1);
21425
21426 switch (Src.getOpcode()) {
21427 default:
21428 break;
21429 case RISCVISD::VMV_V_X_VL:
21430 case RISCVISD::VFMV_V_F_VL:
21431 // Drop a redundant vrgather_vx.
21432 // TODO: Remove the type restriction if we find a motivating
21433 // test case?
21434 if (Passthru.isUndef() && VL == Src.getOperand(2) &&
21435 Src.getValueType() == VT)
21436 return Src;
21437 break;
21438 case RISCVISD::VMV_S_X_VL:
21439 case RISCVISD::VFMV_S_F_VL:
21440 // If this use only demands lane zero from the source vmv.s.x, and
21441 // doesn't have a passthru, then this vrgather.vi/vx is equivalent to
21442 // a vmv.v.x. Note that there can be other uses of the original
21443 // vmv.s.x and thus we can't eliminate it. (vfmv.s.f is analogous)
21444 if (isNullConstant(Idx) && Passthru.isUndef() &&
21445 VL == Src.getOperand(2)) {
21446 unsigned Opc =
21447 VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
21448 return DAG.getNode(Opc, DL, VT, DAG.getUNDEF(VT), Src.getOperand(1),
21449 VL);
21450 }
21451 break;
21452 }
21453 break;
21454 }
21455 case RISCVISD::TUPLE_EXTRACT: {
21456 EVT VT = N->getValueType(0);
21457 SDValue Tuple = N->getOperand(0);
21458 unsigned Idx = N->getConstantOperandVal(1);
21459 if (!Tuple.hasOneUse() || Tuple.getOpcode() != ISD::INTRINSIC_W_CHAIN)
21460 break;
21461
21462 unsigned NF = 0;
21463 switch (Tuple.getConstantOperandVal(1)) {
21464 default:
21465 break;
21466 case Intrinsic::riscv_vlseg2_mask:
21467 case Intrinsic::riscv_vlseg3_mask:
21468 case Intrinsic::riscv_vlseg4_mask:
21469 case Intrinsic::riscv_vlseg5_mask:
21470 case Intrinsic::riscv_vlseg6_mask:
21471 case Intrinsic::riscv_vlseg7_mask:
21472 case Intrinsic::riscv_vlseg8_mask:
21473 NF = Tuple.getValueType().getRISCVVectorTupleNumFields();
21474 break;
21475 }
21476
21477 if (!NF || Subtarget.hasOptimizedSegmentLoadStore(NF))
21478 break;
21479
21480 unsigned SEW = VT.getScalarSizeInBits();
21481 assert(Log2_64(SEW) == Tuple.getConstantOperandVal(7) &&
21482 "Type mismatch without bitcast?");
21483 unsigned Stride = SEW / 8 * NF;
21484 unsigned Offset = SEW / 8 * Idx;
21485
21486 SDValue Ops[] = {
21487 /*Chain=*/Tuple.getOperand(0),
21488 /*IntID=*/DAG.getTargetConstant(Intrinsic::riscv_vlse_mask, DL, XLenVT),
21489 /*Passthru=*/Tuple.getOperand(2),
21490 /*Ptr=*/
21491 DAG.getNode(ISD::ADD, DL, XLenVT, Tuple.getOperand(3),
21492 DAG.getConstant(Offset, DL, XLenVT)),
21493 /*Stride=*/DAG.getConstant(Stride, DL, XLenVT),
21494 /*Mask=*/Tuple.getOperand(4),
21495 /*VL=*/Tuple.getOperand(5),
21496 /*Policy=*/Tuple.getOperand(6)};
21497
21498 auto *TupleMemSD = cast<MemIntrinsicSDNode>(Tuple);
21499 // Match getTgtMemIntrinsic for non-unit stride case
21500 EVT MemVT = TupleMemSD->getMemoryVT().getScalarType();
21503 TupleMemSD->getMemOperand(), Offset, MemoryLocation::UnknownSize);
21504
21505 SDVTList VTs = DAG.getVTList({VT, MVT::Other});
21506 SDValue Result = DAG.