LLVM 22.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
19#include "RISCVRegisterInfo.h"
21#include "RISCVSubtarget.h"
22#include "llvm/ADT/SmallSet.h"
24#include "llvm/ADT/Statistic.h"
39#include "llvm/IR/IRBuilder.h"
42#include "llvm/IR/IntrinsicsRISCV.h"
46#include "llvm/Support/Debug.h"
52#include <optional>
53
54using namespace llvm;
55
56#define DEBUG_TYPE "riscv-lower"
57
58STATISTIC(NumTailCalls, "Number of tail calls");
59
61 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
62 cl::desc("Give the maximum size (in number of nodes) of the web of "
63 "instructions that we will consider for VW expansion"),
64 cl::init(18));
65
66static cl::opt<bool>
67 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
68 cl::desc("Allow the formation of VW_W operations (e.g., "
69 "VWADD_W) with splat constants"),
70 cl::init(false));
71
73 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
74 cl::desc("Set the minimum number of repetitions of a divisor to allow "
75 "transformation to multiplications by the reciprocal"),
76 cl::init(2));
77
78static cl::opt<int>
80 cl::desc("Give the maximum number of instructions that we will "
81 "use for creating a floating-point immediate value"),
82 cl::init(3));
83
84static cl::opt<bool>
85 ReassocShlAddiAdd("reassoc-shl-addi-add", cl::Hidden,
86 cl::desc("Swap add and addi in cases where the add may "
87 "be combined with a shift"),
88 cl::init(true));
89
90// TODO: Support more ops
91static const unsigned ZvfbfaVPOps[] = {
92 ISD::VP_FNEG, ISD::VP_FABS, ISD::VP_FCOPYSIGN};
96
98 const RISCVSubtarget &STI)
99 : TargetLowering(TM, STI), Subtarget(STI) {
100
101 RISCVABI::ABI ABI = Subtarget.getTargetABI();
102 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
103
104 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
105 !Subtarget.hasStdExtF()) {
106 errs() << "Hard-float 'f' ABI can't be used for a target that "
107 "doesn't support the F instruction set extension (ignoring "
108 "target-abi)\n";
109 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
110 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
111 !Subtarget.hasStdExtD()) {
112 errs() << "Hard-float 'd' ABI can't be used for a target that "
113 "doesn't support the D instruction set extension (ignoring "
114 "target-abi)\n";
115 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
116 }
117
118 switch (ABI) {
119 default:
120 reportFatalUsageError("Don't know how to lower this ABI");
129 break;
130 }
131
132 MVT XLenVT = Subtarget.getXLenVT();
133
134 // Set up the register classes.
135 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
136
137 if (Subtarget.hasStdExtZfhmin())
138 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
139 if (Subtarget.hasStdExtZfbfmin() || Subtarget.hasVendorXAndesBFHCvt())
140 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
141 if (Subtarget.hasStdExtF())
142 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
143 if (Subtarget.hasStdExtD())
144 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
145 if (Subtarget.hasStdExtZhinxmin())
146 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
147 if (Subtarget.hasStdExtZfinx())
148 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
149 if (Subtarget.hasStdExtZdinx()) {
150 if (Subtarget.is64Bit())
151 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
152 else
153 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
154 }
155
156 static const MVT::SimpleValueType BoolVecVTs[] = {
157 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
158 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
159 static const MVT::SimpleValueType IntVecVTs[] = {
160 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
161 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
162 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
163 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
164 MVT::nxv4i64, MVT::nxv8i64};
165 static const MVT::SimpleValueType F16VecVTs[] = {
166 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
167 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
168 static const MVT::SimpleValueType BF16VecVTs[] = {
169 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
170 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
171 static const MVT::SimpleValueType F32VecVTs[] = {
172 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
173 static const MVT::SimpleValueType F64VecVTs[] = {
174 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
175 static const MVT::SimpleValueType VecTupleVTs[] = {
176 MVT::riscv_nxv1i8x2, MVT::riscv_nxv1i8x3, MVT::riscv_nxv1i8x4,
177 MVT::riscv_nxv1i8x5, MVT::riscv_nxv1i8x6, MVT::riscv_nxv1i8x7,
178 MVT::riscv_nxv1i8x8, MVT::riscv_nxv2i8x2, MVT::riscv_nxv2i8x3,
179 MVT::riscv_nxv2i8x4, MVT::riscv_nxv2i8x5, MVT::riscv_nxv2i8x6,
180 MVT::riscv_nxv2i8x7, MVT::riscv_nxv2i8x8, MVT::riscv_nxv4i8x2,
181 MVT::riscv_nxv4i8x3, MVT::riscv_nxv4i8x4, MVT::riscv_nxv4i8x5,
182 MVT::riscv_nxv4i8x6, MVT::riscv_nxv4i8x7, MVT::riscv_nxv4i8x8,
183 MVT::riscv_nxv8i8x2, MVT::riscv_nxv8i8x3, MVT::riscv_nxv8i8x4,
184 MVT::riscv_nxv8i8x5, MVT::riscv_nxv8i8x6, MVT::riscv_nxv8i8x7,
185 MVT::riscv_nxv8i8x8, MVT::riscv_nxv16i8x2, MVT::riscv_nxv16i8x3,
186 MVT::riscv_nxv16i8x4, MVT::riscv_nxv32i8x2};
187
188 if (Subtarget.hasVInstructions()) {
189 auto addRegClassForRVV = [this](MVT VT) {
190 // Disable the smallest fractional LMUL types if ELEN is less than
191 // RVVBitsPerBlock.
192 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
193 if (VT.getVectorMinNumElements() < MinElts)
194 return;
195
196 unsigned Size = VT.getSizeInBits().getKnownMinValue();
197 const TargetRegisterClass *RC;
199 RC = &RISCV::VRRegClass;
200 else if (Size == 2 * RISCV::RVVBitsPerBlock)
201 RC = &RISCV::VRM2RegClass;
202 else if (Size == 4 * RISCV::RVVBitsPerBlock)
203 RC = &RISCV::VRM4RegClass;
204 else if (Size == 8 * RISCV::RVVBitsPerBlock)
205 RC = &RISCV::VRM8RegClass;
206 else
207 llvm_unreachable("Unexpected size");
208
209 addRegisterClass(VT, RC);
210 };
211
212 for (MVT VT : BoolVecVTs)
213 addRegClassForRVV(VT);
214 for (MVT VT : IntVecVTs) {
215 if (VT.getVectorElementType() == MVT::i64 &&
216 !Subtarget.hasVInstructionsI64())
217 continue;
218 addRegClassForRVV(VT);
219 }
220
221 if (Subtarget.hasVInstructionsF16Minimal() ||
222 Subtarget.hasVendorXAndesVPackFPH())
223 for (MVT VT : F16VecVTs)
224 addRegClassForRVV(VT);
225
226 if (Subtarget.hasVInstructionsBF16Minimal() ||
227 Subtarget.hasVendorXAndesVBFHCvt())
228 for (MVT VT : BF16VecVTs)
229 addRegClassForRVV(VT);
230
231 if (Subtarget.hasVInstructionsF32())
232 for (MVT VT : F32VecVTs)
233 addRegClassForRVV(VT);
234
235 if (Subtarget.hasVInstructionsF64())
236 for (MVT VT : F64VecVTs)
237 addRegClassForRVV(VT);
238
239 if (Subtarget.useRVVForFixedLengthVectors()) {
240 auto addRegClassForFixedVectors = [this](MVT VT) {
241 MVT ContainerVT = getContainerForFixedLengthVector(VT);
242 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
243 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
244 addRegisterClass(VT, TRI.getRegClass(RCID));
245 };
247 if (useRVVForFixedLengthVectorVT(VT))
248 addRegClassForFixedVectors(VT);
249
251 if (useRVVForFixedLengthVectorVT(VT))
252 addRegClassForFixedVectors(VT);
253 }
254
255 addRegisterClass(MVT::riscv_nxv1i8x2, &RISCV::VRN2M1RegClass);
256 addRegisterClass(MVT::riscv_nxv1i8x3, &RISCV::VRN3M1RegClass);
257 addRegisterClass(MVT::riscv_nxv1i8x4, &RISCV::VRN4M1RegClass);
258 addRegisterClass(MVT::riscv_nxv1i8x5, &RISCV::VRN5M1RegClass);
259 addRegisterClass(MVT::riscv_nxv1i8x6, &RISCV::VRN6M1RegClass);
260 addRegisterClass(MVT::riscv_nxv1i8x7, &RISCV::VRN7M1RegClass);
261 addRegisterClass(MVT::riscv_nxv1i8x8, &RISCV::VRN8M1RegClass);
262 addRegisterClass(MVT::riscv_nxv2i8x2, &RISCV::VRN2M1RegClass);
263 addRegisterClass(MVT::riscv_nxv2i8x3, &RISCV::VRN3M1RegClass);
264 addRegisterClass(MVT::riscv_nxv2i8x4, &RISCV::VRN4M1RegClass);
265 addRegisterClass(MVT::riscv_nxv2i8x5, &RISCV::VRN5M1RegClass);
266 addRegisterClass(MVT::riscv_nxv2i8x6, &RISCV::VRN6M1RegClass);
267 addRegisterClass(MVT::riscv_nxv2i8x7, &RISCV::VRN7M1RegClass);
268 addRegisterClass(MVT::riscv_nxv2i8x8, &RISCV::VRN8M1RegClass);
269 addRegisterClass(MVT::riscv_nxv4i8x2, &RISCV::VRN2M1RegClass);
270 addRegisterClass(MVT::riscv_nxv4i8x3, &RISCV::VRN3M1RegClass);
271 addRegisterClass(MVT::riscv_nxv4i8x4, &RISCV::VRN4M1RegClass);
272 addRegisterClass(MVT::riscv_nxv4i8x5, &RISCV::VRN5M1RegClass);
273 addRegisterClass(MVT::riscv_nxv4i8x6, &RISCV::VRN6M1RegClass);
274 addRegisterClass(MVT::riscv_nxv4i8x7, &RISCV::VRN7M1RegClass);
275 addRegisterClass(MVT::riscv_nxv4i8x8, &RISCV::VRN8M1RegClass);
276 addRegisterClass(MVT::riscv_nxv8i8x2, &RISCV::VRN2M1RegClass);
277 addRegisterClass(MVT::riscv_nxv8i8x3, &RISCV::VRN3M1RegClass);
278 addRegisterClass(MVT::riscv_nxv8i8x4, &RISCV::VRN4M1RegClass);
279 addRegisterClass(MVT::riscv_nxv8i8x5, &RISCV::VRN5M1RegClass);
280 addRegisterClass(MVT::riscv_nxv8i8x6, &RISCV::VRN6M1RegClass);
281 addRegisterClass(MVT::riscv_nxv8i8x7, &RISCV::VRN7M1RegClass);
282 addRegisterClass(MVT::riscv_nxv8i8x8, &RISCV::VRN8M1RegClass);
283 addRegisterClass(MVT::riscv_nxv16i8x2, &RISCV::VRN2M2RegClass);
284 addRegisterClass(MVT::riscv_nxv16i8x3, &RISCV::VRN3M2RegClass);
285 addRegisterClass(MVT::riscv_nxv16i8x4, &RISCV::VRN4M2RegClass);
286 addRegisterClass(MVT::riscv_nxv32i8x2, &RISCV::VRN2M4RegClass);
287 }
288
289 // fixed vector is stored in GPRs for P extension packed operations
290 if (Subtarget.enablePExtCodeGen()) {
291 if (Subtarget.is64Bit()) {
292 addRegisterClass(MVT::v2i32, &RISCV::GPRRegClass);
293 addRegisterClass(MVT::v4i16, &RISCV::GPRRegClass);
294 addRegisterClass(MVT::v8i8, &RISCV::GPRRegClass);
295 } else {
296 addRegisterClass(MVT::v2i16, &RISCV::GPRRegClass);
297 addRegisterClass(MVT::v4i8, &RISCV::GPRRegClass);
298 }
299 }
300
301 // Compute derived properties from the register classes.
303
305
307 MVT::i1, Promote);
308 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
310 MVT::i1, Promote);
311
312 // TODO: add all necessary setOperationAction calls.
314
319
324 if (!(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
327 }
328
330
333
334 if (!Subtarget.hasVendorXTHeadBb() && !Subtarget.hasVendorXqcibm() &&
335 !Subtarget.hasVendorXAndesPerf())
337
339
340 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb() &&
341 !Subtarget.hasVendorXqcibm() && !Subtarget.hasVendorXAndesPerf() &&
342 !(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()))
343 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
344
345 if (Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit()) {
348 }
349
350 if (Subtarget.is64Bit()) {
352
355 MVT::i32, Custom);
357 if (!Subtarget.hasStdExtZbb())
360 Custom);
362 }
363 if (!Subtarget.hasStdExtZmmul()) {
365 } else if (Subtarget.is64Bit()) {
368 } else {
370 }
371
372 if (!Subtarget.hasStdExtM()) {
374 Expand);
375 } else if (Subtarget.is64Bit()) {
377 {MVT::i8, MVT::i16, MVT::i32}, Custom);
378 }
379
382 Expand);
383
385 Custom);
386
387 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
388 if (Subtarget.is64Bit())
390 } else if (Subtarget.hasVendorXTHeadBb()) {
391 if (Subtarget.is64Bit())
394 } else if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
396 } else {
398 }
399
401 Subtarget.hasREV8Like() ? Legal : Expand);
402
403 if ((Subtarget.hasVendorXCVbitmanip() || Subtarget.hasVendorXqcibm()) &&
404 !Subtarget.is64Bit()) {
406 } else {
407 // Zbkb can use rev8+brev8 to implement bitreverse.
409 Subtarget.hasStdExtZbkb() ? Custom : Expand);
410 if (Subtarget.hasStdExtZbkb())
412 }
413
414 if (Subtarget.hasStdExtZbb() ||
415 (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
417 Legal);
418 }
419
420 if (Subtarget.hasCTZLike()) {
421 if (Subtarget.is64Bit())
423 } else {
425 }
426
427 if (!Subtarget.hasCPOPLike()) {
428 // TODO: These should be set to LibCall, but this currently breaks
429 // the Linux kernel build. See #101786. Lacks i128 tests, too.
430 if (Subtarget.is64Bit())
432 else
435 }
436
437 if (Subtarget.hasCLZLike()) {
438 // We need the custom lowering to make sure that the resulting sequence
439 // for the 32bit case is efficient on 64bit targets.
440 // Use default promotion for i32 without Zbb.
441 if (Subtarget.is64Bit() &&
442 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtP()))
444 } else {
446 }
447
448 if (Subtarget.hasStdExtP() ||
449 (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
451 if (Subtarget.is64Bit())
453 } else if (Subtarget.hasShortForwardBranchIALU()) {
454 // We can use PseudoCCSUB to implement ABS.
456 } else if (Subtarget.is64Bit()) {
458 }
459
460 if (!Subtarget.useMIPSCCMovInsn() && !Subtarget.hasVendorXTHeadCondMov())
462
463 if (Subtarget.hasVendorXqcia() && !Subtarget.is64Bit()) {
470 }
471
472 static const unsigned FPLegalNodeTypes[] = {
480
481 static const ISD::CondCode FPCCToExpand[] = {
485
486 static const unsigned FPOpToExpand[] = {
488 ISD::FREM};
489
490 static const unsigned FPRndMode[] = {
493
494 static const unsigned ZfhminZfbfminPromoteOps[] = {
504
505 if (Subtarget.enablePExtCodeGen()) {
507 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
508 setTruncStoreAction(MVT::v4i16, MVT::v4i8, Expand);
510 if (Subtarget.is64Bit()) {
511 VTs.append({MVT::v2i32, MVT::v4i16, MVT::v8i8});
512 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Expand);
513 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand);
514 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Expand);
515 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
516 setTruncStoreAction(MVT::v4i16, MVT::v4i8, Expand);
517 } else {
518 VTs.append({MVT::v2i16, MVT::v4i8});
520 }
532 }
533
534 if (Subtarget.hasStdExtZfbfmin()) {
540 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
547 }
548
549 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
550 if (Subtarget.hasStdExtZfhOrZhinx()) {
551 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
552 setOperationAction(FPRndMode, MVT::f16,
553 Subtarget.hasStdExtZfa() ? Legal : Custom);
556 Subtarget.hasStdExtZfa() ? Legal : Custom);
557 if (Subtarget.hasStdExtZfa())
559 } else {
560 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
565 setOperationAction(Op, MVT::f16, Custom);
571 }
572
574
577 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
581
583 ISD::FNEARBYINT, MVT::f16,
584 Subtarget.hasStdExtZfh() && Subtarget.hasStdExtZfa() ? Legal : Promote);
589 MVT::f16, Promote);
590
591 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
592 // complete support for all operations in LegalizeDAG.
597 MVT::f16, Promote);
598
599 // We need to custom promote this.
600 if (Subtarget.is64Bit())
602 }
603
604 if (Subtarget.hasStdExtFOrZfinx()) {
605 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
606 setOperationAction(FPRndMode, MVT::f32,
607 Subtarget.hasStdExtZfa() ? Legal : Custom);
608 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
612 setOperationAction(FPOpToExpand, MVT::f32, Expand);
613 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
614 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
615 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
616 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
620 Subtarget.isSoftFPABI() ? LibCall : Custom);
625
626 if (Subtarget.hasStdExtZfa()) {
630 } else {
632 }
633 }
634
635 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
637
638 if (Subtarget.hasStdExtDOrZdinx()) {
639 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
640
641 if (!Subtarget.is64Bit())
643
644 if (Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
645 !Subtarget.is64Bit()) {
648 }
649
650 if (Subtarget.hasStdExtZfa()) {
652 setOperationAction(FPRndMode, MVT::f64, Legal);
655 } else {
656 if (Subtarget.is64Bit())
657 setOperationAction(FPRndMode, MVT::f64, Custom);
658
660 }
661
664 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
668 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
669 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
670 setOperationAction(FPOpToExpand, MVT::f64, Expand);
671 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
672 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
673 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
674 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
678 Subtarget.isSoftFPABI() ? LibCall : Custom);
683 }
684
685 if (Subtarget.is64Bit()) {
688 MVT::i32, Custom);
690 }
691
692 if (Subtarget.hasStdExtFOrZfinx()) {
694 Custom);
695
696 // f16/bf16 require custom handling.
698 Custom);
700 Custom);
701
710 }
711
714 XLenVT, Custom);
715
717
718 if (Subtarget.is64Bit())
720
721 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
722 // Unfortunately this can't be determined just from the ISA naming string.
724 Subtarget.is64Bit() ? Legal : Custom);
726 Subtarget.is64Bit() ? Legal : Custom);
727
728 if (Subtarget.is64Bit()) {
731 }
732
735 if (Subtarget.is64Bit())
737
738 if (Subtarget.hasVendorXMIPSCBOP())
740 else if (Subtarget.hasStdExtZicbop())
742
743 if (Subtarget.hasStdExtZalrsc()) {
744 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
745 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
747 else
749 } else if (Subtarget.hasForcedAtomics()) {
750 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
751 } else {
753 }
754
756
758
759 if (getTargetMachine().getTargetTriple().isOSLinux()) {
760 // Custom lowering of llvm.clear_cache.
762 }
763
764 if (Subtarget.hasVInstructions()) {
766
768
769 // RVV intrinsics may have illegal operands.
770 // We also need to custom legalize vmv.x.s.
773 {MVT::i8, MVT::i16}, Custom);
774 if (Subtarget.is64Bit())
776 MVT::i32, Custom);
777 else
779 MVT::i64, Custom);
780
782 MVT::Other, Custom);
783
784 static const unsigned IntegerVPOps[] = {
785 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
786 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
787 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
788 ISD::VP_XOR, ISD::VP_SRA, ISD::VP_SRL,
789 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
790 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
791 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
792 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
793 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
794 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
795 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
796 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
797 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
798 ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF};
799
800 static const unsigned FloatingPointVPOps[] = {
801 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
802 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
803 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
804 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
805 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
806 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
807 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
808 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
809 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
810 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
811 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
812 ISD::VP_LLRINT, ISD::VP_REDUCE_FMINIMUM,
813 ISD::VP_REDUCE_FMAXIMUM};
814
815 static const unsigned IntegerVecReduceOps[] = {
819
820 static const unsigned FloatingPointVecReduceOps[] = {
823
824 static const unsigned FloatingPointLibCallOps[] = {
827
828 if (!Subtarget.is64Bit()) {
829 // We must custom-lower certain vXi64 operations on RV32 due to the vector
830 // element type being illegal.
832 MVT::i64, Custom);
833
834 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
835
836 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
837 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
838 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
839 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
840 MVT::i64, Custom);
841 }
842
843 for (MVT VT : BoolVecVTs) {
844 if (!isTypeLegal(VT))
845 continue;
846
848
849 // Mask VTs are custom-expanded into a series of standard nodes
853 VT, Custom);
854
856 Custom);
857
859 setOperationAction({ISD::SELECT_CC, ISD::VSELECT, ISD::VP_SELECT}, VT,
860 Expand);
861 setOperationAction(ISD::VP_MERGE, VT, Custom);
862
863 setOperationAction({ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT,
864 Custom);
865
866 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
867
870 Custom);
871
873 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
874 Custom);
875
876 // RVV has native int->float & float->int conversions where the
877 // element type sizes are within one power-of-two of each other. Any
878 // wider distances between type sizes have to be lowered as sequences
879 // which progressively narrow the gap in stages.
884 VT, Custom);
886 Custom);
887
888 // Expand all extending loads to types larger than this, and truncating
889 // stores from types larger than this.
891 setTruncStoreAction(VT, OtherVT, Expand);
893 OtherVT, Expand);
894 }
895
896 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
897 ISD::VP_TRUNCATE, ISD::VP_SETCC},
898 VT, Custom);
899
902
904
905 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
906 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
907
910 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
911 }
912
913 for (MVT VT : IntVecVTs) {
914 if (!isTypeLegal(VT))
915 continue;
916
919
920 // Vectors implement MULHS/MULHU.
922
923 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
924 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
926
928 Legal);
929
931
932 // Custom-lower extensions and truncations from/to mask types.
934 VT, Custom);
935
936 // RVV has native int->float & float->int conversions where the
937 // element type sizes are within one power-of-two of each other. Any
938 // wider distances between type sizes have to be lowered as sequences
939 // which progressively narrow the gap in stages.
944 VT, Custom);
946 Custom);
950 VT, Legal);
951
952 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
953 // nodes which truncate by one power of two at a time.
956 Custom);
957
958 // Custom-lower insert/extract operations to simplify patterns.
960 Custom);
961
962 // Custom-lower reduction operations to set up the corresponding custom
963 // nodes' operands.
964 setOperationAction(IntegerVecReduceOps, VT, Custom);
965
966 setOperationAction(IntegerVPOps, VT, Custom);
967
969
971 VT, Custom);
972
974 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
975 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
976 VT, Custom);
977 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
978
981 VT, Custom);
982
985
987
989 setTruncStoreAction(VT, OtherVT, Expand);
991 OtherVT, Expand);
992 }
993
996
997 // Splice
999
1000 if (Subtarget.hasStdExtZvkb()) {
1002 setOperationAction(ISD::VP_BSWAP, VT, Custom);
1003 } else {
1004 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
1006 }
1007
1008 if (Subtarget.hasStdExtZvbb()) {
1010 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
1011 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
1012 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
1013 VT, Custom);
1014 } else {
1015 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
1017 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
1018 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
1019 VT, Expand);
1020
1021 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1022 // range of f32.
1023 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1024 if (isTypeLegal(FloatVT)) {
1026 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
1027 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
1028 VT, Custom);
1029 }
1030 }
1031
1033 }
1034
1035 for (MVT VT : VecTupleVTs) {
1036 if (!isTypeLegal(VT))
1037 continue;
1038
1040 }
1041
1042 // Expand various CCs to best match the RVV ISA, which natively supports UNE
1043 // but no other unordered comparisons, and supports all ordered comparisons
1044 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
1045 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
1046 // and we pattern-match those back to the "original", swapping operands once
1047 // more. This way we catch both operations and both "vf" and "fv" forms with
1048 // fewer patterns.
1049 static const ISD::CondCode VFPCCToExpand[] = {
1053 };
1054
1055 // TODO: support more ops.
1056 static const unsigned ZvfhminZvfbfminPromoteOps[] = {
1061 ISD::FADD,
1062 ISD::FSUB,
1063 ISD::FMUL,
1064 ISD::FMA,
1065 ISD::FDIV,
1066 ISD::FSQRT,
1067 ISD::FCEIL,
1072 ISD::FRINT,
1075 ISD::SETCC,
1088
1089 // TODO: Make more of these ops legal.
1090 static const unsigned ZvfbfaPromoteOps[] = {ISD::FMINNUM,
1094 ISD::FDIV,
1095 ISD::FMA,
1096 ISD::FSQRT,
1097 ISD::FCEIL,
1102 ISD::FRINT,
1105 ISD::SETCC,
1118
1119 // TODO: support more vp ops.
1120 static const unsigned ZvfhminZvfbfminPromoteVPOps[] = {
1121 ISD::VP_FADD,
1122 ISD::VP_FSUB,
1123 ISD::VP_FMUL,
1124 ISD::VP_FDIV,
1125 ISD::VP_FMA,
1126 ISD::VP_REDUCE_FMIN,
1127 ISD::VP_REDUCE_FMAX,
1128 ISD::VP_SQRT,
1129 ISD::VP_FMINNUM,
1130 ISD::VP_FMAXNUM,
1131 ISD::VP_FCEIL,
1132 ISD::VP_FFLOOR,
1133 ISD::VP_FROUND,
1134 ISD::VP_FROUNDEVEN,
1135 ISD::VP_FROUNDTOZERO,
1136 ISD::VP_FRINT,
1137 ISD::VP_FNEARBYINT,
1138 ISD::VP_SETCC,
1139 ISD::VP_FMINIMUM,
1140 ISD::VP_FMAXIMUM,
1141 ISD::VP_REDUCE_FMINIMUM,
1142 ISD::VP_REDUCE_FMAXIMUM};
1143
1144 // Sets common operation actions on RVV floating-point vector types.
1145 const auto SetCommonVFPActions = [&](MVT VT) {
1147 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
1148 // sizes are within one power-of-two of each other. Therefore conversions
1149 // between vXf16 and vXf64 must be lowered as sequences which convert via
1150 // vXf32.
1154 // Custom-lower insert/extract operations to simplify patterns.
1156 Custom);
1157 // Expand various condition codes (explained above).
1158 setCondCodeAction(VFPCCToExpand, VT, Expand);
1159
1162 Legal);
1164
1168 VT, Custom);
1169
1170 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1171
1172 // Expand FP operations that need libcalls.
1173 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1174
1176
1178
1180 VT, Custom);
1181
1183 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1184 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1185 VT, Custom);
1186 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1187
1190
1193 VT, Custom);
1194
1197
1199 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1200 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1201
1202 setOperationAction(FloatingPointVPOps, VT, Custom);
1203
1205 Custom);
1208 VT, Legal);
1213 VT, Custom);
1214
1216 };
1217
1218 // Sets common extload/truncstore actions on RVV floating-point vector
1219 // types.
1220 const auto SetCommonVFPExtLoadTruncStoreActions =
1221 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1222 for (auto SmallVT : SmallerVTs) {
1223 setTruncStoreAction(VT, SmallVT, Expand);
1224 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1225 }
1226 };
1227
1228 // Sets common actions for f16 and bf16 for when there's only
1229 // zvfhmin/zvfbfmin and we need to promote to f32 for most operations.
1230 const auto SetCommonPromoteToF32Actions = [&](MVT VT) {
1233 Custom);
1234 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1237 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1238 Custom);
1240 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom);
1246 VT, Custom);
1247 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1248 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1249 MVT EltVT = VT.getVectorElementType();
1250 if (isTypeLegal(EltVT))
1252 VT, Custom);
1253 else
1256 ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD,
1257 ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1258 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1259 ISD::VP_SCATTER},
1260 VT, Custom);
1261 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1262
1266
1267 // Expand FP operations that need libcalls.
1268 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1269
1270 // Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.
1271 if (getLMUL(VT) == RISCVVType::LMUL_8) {
1272 setOperationAction(ZvfhminZvfbfminPromoteOps, VT, Custom);
1273 setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom);
1274 } else {
1275 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1276 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1277 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1278 }
1279 };
1280
1281 // Sets common actions for zvfbfa, some of instructions are supported
1282 // natively so that we don't need to promote them.
1283 const auto SetZvfbfaActions = [&](MVT VT) {
1286 Custom);
1287 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1290 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1291 Custom);
1293 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom);
1299 VT, Custom);
1300 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1301 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1302
1306
1308 ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD,
1309 ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1310 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1311 ISD::VP_SCATTER},
1312 VT, Custom);
1313 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1314
1315 // Expand FP operations that need libcalls.
1316 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1317
1318 // Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.
1319 if (getLMUL(VT) == RISCVVType::LMUL_8) {
1320 setOperationAction(ZvfbfaPromoteOps, VT, Custom);
1321 setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom);
1322 } else {
1323 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1324 setOperationPromotedToType(ZvfbfaPromoteOps, VT, F32VecVT);
1325 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1326 }
1327 };
1328
1329 if (Subtarget.hasVInstructionsF16()) {
1330 for (MVT VT : F16VecVTs) {
1331 if (!isTypeLegal(VT))
1332 continue;
1333 SetCommonVFPActions(VT);
1334 }
1335 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1336 for (MVT VT : F16VecVTs) {
1337 if (!isTypeLegal(VT))
1338 continue;
1339 SetCommonPromoteToF32Actions(VT);
1340 }
1341 }
1342
1343 if (Subtarget.hasVInstructionsBF16()) {
1344 for (MVT VT : BF16VecVTs) {
1345 if (!isTypeLegal(VT))
1346 continue;
1347 SetZvfbfaActions(VT);
1348 }
1349 } else if (Subtarget.hasVInstructionsBF16Minimal()) {
1350 for (MVT VT : BF16VecVTs) {
1351 if (!isTypeLegal(VT))
1352 continue;
1353 SetCommonPromoteToF32Actions(VT);
1354 }
1355 }
1356
1357 if (Subtarget.hasVInstructionsF32()) {
1358 for (MVT VT : F32VecVTs) {
1359 if (!isTypeLegal(VT))
1360 continue;
1361 SetCommonVFPActions(VT);
1362 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1363 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1364 }
1365 }
1366
1367 if (Subtarget.hasVInstructionsF64()) {
1368 for (MVT VT : F64VecVTs) {
1369 if (!isTypeLegal(VT))
1370 continue;
1371 SetCommonVFPActions(VT);
1372 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1373 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1374 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1375 }
1376 }
1377
1378 if (Subtarget.useRVVForFixedLengthVectors()) {
1380 if (!useRVVForFixedLengthVectorVT(VT))
1381 continue;
1382
1383 // By default everything must be expanded.
1384 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1387 setTruncStoreAction(VT, OtherVT, Expand);
1389 OtherVT, Expand);
1390 }
1391
1392 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1393 // expansion to a build_vector of 0s.
1395
1396 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1398 Custom);
1399
1402 Custom);
1403
1405 VT, Custom);
1406
1408 VT, Custom);
1409
1411
1413
1415
1417
1420 Custom);
1421
1423
1426 Custom);
1427
1429 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1430 Custom);
1431
1433 {
1442 },
1443 VT, Custom);
1445 Custom);
1446
1448
1449 // Operations below are different for between masks and other vectors.
1450 if (VT.getVectorElementType() == MVT::i1) {
1451 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1452 ISD::OR, ISD::XOR},
1453 VT, Custom);
1454
1455 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1456 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1457 VT, Custom);
1458
1459 setOperationAction(ISD::VP_MERGE, VT, Custom);
1460
1461 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1462 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1463 continue;
1464 }
1465
1466 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1467 // it before type legalization for i64 vectors on RV32. It will then be
1468 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1469 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1470 // improvements first.
1471 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1474
1475 // Lower BUILD_VECTOR with i64 type to VID on RV32 if possible.
1477 }
1478
1481
1482 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1483 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1484 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1485 ISD::VP_SCATTER},
1486 VT, Custom);
1487 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1488
1492 VT, Custom);
1493
1496
1498
1499 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1500 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1502
1506 VT, Custom);
1507
1509
1512
1513 // Custom-lower reduction operations to set up the corresponding custom
1514 // nodes' operands.
1518 VT, Custom);
1519
1520 setOperationAction(IntegerVPOps, VT, Custom);
1521
1522 if (Subtarget.hasStdExtZvkb())
1524
1525 if (Subtarget.hasStdExtZvbb()) {
1528 VT, Custom);
1529 } else {
1530 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1531 // range of f32.
1532 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1533 if (isTypeLegal(FloatVT))
1536 Custom);
1537 }
1538
1540 }
1541
1543 // There are no extending loads or truncating stores.
1544 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1545 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1546 setTruncStoreAction(VT, InnerVT, Expand);
1547 }
1548
1549 if (!useRVVForFixedLengthVectorVT(VT))
1550 continue;
1551
1552 // By default everything must be expanded.
1553 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1555
1556 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1557 // expansion to a build_vector of 0s.
1559
1564 VT, Custom);
1565 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1566 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1567
1569 VT, Custom);
1570
1573 VT, Custom);
1574 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER,
1575 ISD::VP_SCATTER, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1576 ISD::EXPERIMENTAL_VP_STRIDED_STORE},
1577 VT, Custom);
1578 setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
1579
1582 Custom);
1583
1584 if (VT.getVectorElementType() == MVT::f16 &&
1585 !Subtarget.hasVInstructionsF16()) {
1587 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1589 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1590 Custom);
1591 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT,
1592 Custom);
1595 if (Subtarget.hasStdExtZfhmin()) {
1597 } else {
1598 // We need to custom legalize f16 build vectors if Zfhmin isn't
1599 // available.
1601 }
1605 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1606 // Don't promote f16 vector operations to f32 if f32 vector type is
1607 // not legal.
1608 // TODO: could split the f16 vector into two vectors and do promotion.
1609 if (!isTypeLegal(F32VecVT))
1610 continue;
1611 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1612 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1613 continue;
1614 }
1615
1616 if (VT.getVectorElementType() == MVT::bf16) {
1618 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1621 if (Subtarget.hasStdExtZfbfmin()) {
1623 } else {
1624 // We need to custom legalize bf16 build vectors if Zfbfmin isn't
1625 // available.
1627 }
1628 if (Subtarget.hasStdExtZvfbfa()) {
1631 }
1633 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1634 Custom);
1635 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1636 // Don't promote f16 vector operations to f32 if f32 vector type is
1637 // not legal.
1638 // TODO: could split the f16 vector into two vectors and do promotion.
1639 if (!isTypeLegal(F32VecVT))
1640 continue;
1641
1642 if (Subtarget.hasStdExtZvfbfa())
1643 setOperationPromotedToType(ZvfbfaPromoteOps, VT, F32VecVT);
1644 else
1645 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1646 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1647 continue;
1648 }
1649
1651 Custom);
1652
1658 VT, Custom);
1659
1664 VT, Custom);
1665
1666 setCondCodeAction(VFPCCToExpand, VT, Expand);
1667
1670
1672
1673 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1674
1675 setOperationAction(FloatingPointVPOps, VT, Custom);
1676
1683 VT, Custom);
1684 }
1685
1686 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1687 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32}, Custom);
1688 if (Subtarget.is64Bit())
1690 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1692 if (Subtarget.hasStdExtZfbfmin())
1694 if (Subtarget.hasStdExtFOrZfinx())
1696 if (Subtarget.hasStdExtDOrZdinx())
1698 }
1699 }
1700
1701 if (Subtarget.hasStdExtZaamo())
1703
1704 if (Subtarget.hasForcedAtomics()) {
1705 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1711 XLenVT, LibCall);
1712 }
1713
1714 if (Subtarget.hasVendorXTHeadMemIdx()) {
1715 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1716 setIndexedLoadAction(im, MVT::i8, Legal);
1717 setIndexedStoreAction(im, MVT::i8, Legal);
1718 setIndexedLoadAction(im, MVT::i16, Legal);
1719 setIndexedStoreAction(im, MVT::i16, Legal);
1720 setIndexedLoadAction(im, MVT::i32, Legal);
1721 setIndexedStoreAction(im, MVT::i32, Legal);
1722
1723 if (Subtarget.is64Bit()) {
1724 setIndexedLoadAction(im, MVT::i64, Legal);
1725 setIndexedStoreAction(im, MVT::i64, Legal);
1726 }
1727 }
1728 }
1729
1730 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
1734
1738 }
1739
1740 // zve32x is broken for partial_reduce_umla, but let's not make it worse.
1741 if (Subtarget.hasStdExtZvqdotq() && Subtarget.getELen() >= 64) {
1742 static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
1745 setPartialReduceMLAAction(MLAOps, MVT::nxv1i32, MVT::nxv4i8, Custom);
1746 setPartialReduceMLAAction(MLAOps, MVT::nxv2i32, MVT::nxv8i8, Custom);
1747 setPartialReduceMLAAction(MLAOps, MVT::nxv4i32, MVT::nxv16i8, Custom);
1748 setPartialReduceMLAAction(MLAOps, MVT::nxv8i32, MVT::nxv32i8, Custom);
1749 setPartialReduceMLAAction(MLAOps, MVT::nxv16i32, MVT::nxv64i8, Custom);
1750
1751 if (Subtarget.useRVVForFixedLengthVectors()) {
1753 if (VT.getVectorElementType() != MVT::i32 ||
1754 !useRVVForFixedLengthVectorVT(VT))
1755 continue;
1756 ElementCount EC = VT.getVectorElementCount();
1757 MVT ArgVT = MVT::getVectorVT(MVT::i8, EC.multiplyCoefficientBy(4));
1758 setPartialReduceMLAAction(MLAOps, VT, ArgVT, Custom);
1759 }
1760 }
1761 }
1762
1763 // Customize load and store operation for bf16 if zfh isn't enabled.
1764 if (Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh()) {
1765 setOperationAction(ISD::LOAD, MVT::bf16, Custom);
1767 }
1768
1769 // Function alignments.
1770 const Align FunctionAlignment(Subtarget.hasStdExtZca() ? 2 : 4);
1771 setMinFunctionAlignment(FunctionAlignment);
1772 // Set preferred alignments.
1773 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
1774 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
1775
1781
1782 if (Subtarget.hasStdExtFOrZfinx())
1784
1785 if (Subtarget.hasStdExtZbb())
1787
1788 if ((Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) ||
1789 Subtarget.hasVInstructions())
1791
1792 if (Subtarget.hasStdExtZbkb())
1794
1795 if (Subtarget.hasStdExtFOrZfinx())
1798 if (Subtarget.hasVInstructions())
1801 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA,
1804 ISD::VP_STORE, ISD::VP_TRUNCATE, ISD::EXPERIMENTAL_VP_REVERSE,
1809
1810 if (Subtarget.hasVendorXTHeadMemPair())
1812 if (Subtarget.useRVVForFixedLengthVectors())
1814
1815 setMaxDivRemBitWidthSupported(Subtarget.is64Bit() ? 128 : 64);
1816
1817 // Disable strict node mutation.
1818 IsStrictFPEnabled = true;
1819 EnableExtLdPromotion = true;
1820
1821 // Let the subtarget decide if a predictable select is more expensive than the
1822 // corresponding branch. This information is used in CGP/SelectOpt to decide
1823 // when to convert selects into branches.
1824 PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive();
1825
1826 MaxStoresPerMemsetOptSize = Subtarget.getMaxStoresPerMemset(/*OptSize=*/true);
1827 MaxStoresPerMemset = Subtarget.getMaxStoresPerMemset(/*OptSize=*/false);
1828
1829 MaxGluedStoresPerMemcpy = Subtarget.getMaxGluedStoresPerMemcpy();
1830 MaxStoresPerMemcpyOptSize = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/true);
1831 MaxStoresPerMemcpy = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/false);
1832
1834 Subtarget.getMaxStoresPerMemmove(/*OptSize=*/true);
1835 MaxStoresPerMemmove = Subtarget.getMaxStoresPerMemmove(/*OptSize=*/false);
1836
1837 MaxLoadsPerMemcmpOptSize = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/true);
1838 MaxLoadsPerMemcmp = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/false);
1839}
1840
1843 if (Subtarget.is64Bit() && Subtarget.enablePExtCodeGen())
1844 if (VT == MVT::v2i16 || VT == MVT::v4i8)
1845 return TypeWidenVector;
1846
1848}
1849
1851 LLVMContext &Context,
1852 EVT VT) const {
1853 if (!VT.isVector())
1854 return getPointerTy(DL);
1855 if (Subtarget.hasVInstructions() &&
1856 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1857 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1859}
1860
1862 return Subtarget.getXLenVT();
1863}
1864
1865// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1866bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1867 unsigned VF,
1868 bool IsScalable) const {
1869 if (!Subtarget.hasVInstructions())
1870 return true;
1871
1872 if (!IsScalable)
1873 return true;
1874
1875 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1876 return true;
1877
1878 // Don't allow VF=1 if those types are't legal.
1879 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1880 return true;
1881
1882 // VLEN=32 support is incomplete.
1883 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1884 return true;
1885
1886 // The maximum VF is for the smallest element width with LMUL=8.
1887 // VF must be a power of 2.
1888 unsigned MaxVF = RISCV::RVVBytesPerBlock * 8;
1889 return VF > MaxVF || !isPowerOf2_32(VF);
1890}
1891
1893 return !Subtarget.hasVInstructions() ||
1894 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
1895}
1896
1898 const CallBase &I,
1899 MachineFunction &MF,
1900 unsigned Intrinsic) const {
1901 auto &DL = I.getDataLayout();
1902
1903 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1904 bool IsUnitStrided, bool UsePtrVal = false) {
1905 Info.opc = IsStore ? ISD::INTRINSIC_VOID : ISD::INTRINSIC_W_CHAIN;
1906 // We can't use ptrVal if the intrinsic can access memory before the
1907 // pointer. This means we can't use it for strided or indexed intrinsics.
1908 if (UsePtrVal)
1909 Info.ptrVal = I.getArgOperand(PtrOp);
1910 else
1911 Info.fallbackAddressSpace =
1912 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1913 Type *MemTy;
1914 if (IsStore) {
1915 // Store value is the first operand.
1916 MemTy = I.getArgOperand(0)->getType();
1917 } else {
1918 // Use return type. If it's segment load, return type is a struct.
1919 MemTy = I.getType();
1920 if (MemTy->isStructTy())
1921 MemTy = MemTy->getStructElementType(0);
1922 }
1923 if (!IsUnitStrided)
1924 MemTy = MemTy->getScalarType();
1925
1926 Info.memVT = getValueType(DL, MemTy);
1927 if (MemTy->isTargetExtTy()) {
1928 // RISC-V vector tuple type's alignment type should be its element type.
1929 if (cast<TargetExtType>(MemTy)->getName() == "riscv.vector.tuple")
1930 MemTy = Type::getIntNTy(
1931 MemTy->getContext(),
1932 1 << cast<ConstantInt>(I.getArgOperand(I.arg_size() - 1))
1933 ->getZExtValue());
1934 Info.align = DL.getABITypeAlign(MemTy);
1935 } else {
1936 Info.align = Align(DL.getTypeStoreSize(MemTy->getScalarType()));
1937 }
1938 Info.size = MemoryLocation::UnknownSize;
1939 Info.flags |=
1941 return true;
1942 };
1943
1944 if (I.hasMetadata(LLVMContext::MD_nontemporal))
1946
1948 switch (Intrinsic) {
1949 default:
1950 return false;
1951 case Intrinsic::riscv_masked_atomicrmw_xchg:
1952 case Intrinsic::riscv_masked_atomicrmw_add:
1953 case Intrinsic::riscv_masked_atomicrmw_sub:
1954 case Intrinsic::riscv_masked_atomicrmw_nand:
1955 case Intrinsic::riscv_masked_atomicrmw_max:
1956 case Intrinsic::riscv_masked_atomicrmw_min:
1957 case Intrinsic::riscv_masked_atomicrmw_umax:
1958 case Intrinsic::riscv_masked_atomicrmw_umin:
1959 case Intrinsic::riscv_masked_cmpxchg:
1960 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
1961 // narrow atomic operation. These will be expanded to an LR/SC loop that
1962 // reads/writes to/from an aligned 4 byte location. And, or, shift, etc.
1963 // will be used to modify the appropriate part of the 4 byte data and
1964 // preserve the rest.
1965 Info.opc = ISD::INTRINSIC_W_CHAIN;
1966 Info.memVT = MVT::i32;
1967 Info.ptrVal = I.getArgOperand(0);
1968 Info.offset = 0;
1969 Info.align = Align(4);
1972 return true;
1973 case Intrinsic::riscv_seg2_load_mask:
1974 case Intrinsic::riscv_seg3_load_mask:
1975 case Intrinsic::riscv_seg4_load_mask:
1976 case Intrinsic::riscv_seg5_load_mask:
1977 case Intrinsic::riscv_seg6_load_mask:
1978 case Intrinsic::riscv_seg7_load_mask:
1979 case Intrinsic::riscv_seg8_load_mask:
1980 case Intrinsic::riscv_sseg2_load_mask:
1981 case Intrinsic::riscv_sseg3_load_mask:
1982 case Intrinsic::riscv_sseg4_load_mask:
1983 case Intrinsic::riscv_sseg5_load_mask:
1984 case Intrinsic::riscv_sseg6_load_mask:
1985 case Intrinsic::riscv_sseg7_load_mask:
1986 case Intrinsic::riscv_sseg8_load_mask:
1987 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1988 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1989 case Intrinsic::riscv_seg2_store_mask:
1990 case Intrinsic::riscv_seg3_store_mask:
1991 case Intrinsic::riscv_seg4_store_mask:
1992 case Intrinsic::riscv_seg5_store_mask:
1993 case Intrinsic::riscv_seg6_store_mask:
1994 case Intrinsic::riscv_seg7_store_mask:
1995 case Intrinsic::riscv_seg8_store_mask:
1996 // Operands are (vec, ..., vec, ptr, mask, vl)
1997 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1998 /*IsStore*/ true,
1999 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
2000 case Intrinsic::riscv_sseg2_store_mask:
2001 case Intrinsic::riscv_sseg3_store_mask:
2002 case Intrinsic::riscv_sseg4_store_mask:
2003 case Intrinsic::riscv_sseg5_store_mask:
2004 case Intrinsic::riscv_sseg6_store_mask:
2005 case Intrinsic::riscv_sseg7_store_mask:
2006 case Intrinsic::riscv_sseg8_store_mask:
2007 // Operands are (vec, ..., vec, ptr, offset, mask, vl)
2008 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
2009 /*IsStore*/ true,
2010 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
2011 case Intrinsic::riscv_vlm:
2012 return SetRVVLoadStoreInfo(/*PtrOp*/ 0,
2013 /*IsStore*/ false,
2014 /*IsUnitStrided*/ true,
2015 /*UsePtrVal*/ true);
2016 case Intrinsic::riscv_vle:
2017 case Intrinsic::riscv_vle_mask:
2018 case Intrinsic::riscv_vleff:
2019 case Intrinsic::riscv_vleff_mask:
2020 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
2021 /*IsStore*/ false,
2022 /*IsUnitStrided*/ true,
2023 /*UsePtrVal*/ true);
2024 case Intrinsic::riscv_vsm:
2025 case Intrinsic::riscv_vse:
2026 case Intrinsic::riscv_vse_mask:
2027 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
2028 /*IsStore*/ true,
2029 /*IsUnitStrided*/ true,
2030 /*UsePtrVal*/ true);
2031 case Intrinsic::riscv_vlse:
2032 case Intrinsic::riscv_vlse_mask:
2033 case Intrinsic::riscv_vloxei:
2034 case Intrinsic::riscv_vloxei_mask:
2035 case Intrinsic::riscv_vluxei:
2036 case Intrinsic::riscv_vluxei_mask:
2037 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
2038 /*IsStore*/ false,
2039 /*IsUnitStrided*/ false);
2040 case Intrinsic::riscv_vsse:
2041 case Intrinsic::riscv_vsse_mask:
2042 case Intrinsic::riscv_vsoxei:
2043 case Intrinsic::riscv_vsoxei_mask:
2044 case Intrinsic::riscv_vsuxei:
2045 case Intrinsic::riscv_vsuxei_mask:
2046 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
2047 /*IsStore*/ true,
2048 /*IsUnitStrided*/ false);
2049 case Intrinsic::riscv_vlseg2:
2050 case Intrinsic::riscv_vlseg3:
2051 case Intrinsic::riscv_vlseg4:
2052 case Intrinsic::riscv_vlseg5:
2053 case Intrinsic::riscv_vlseg6:
2054 case Intrinsic::riscv_vlseg7:
2055 case Intrinsic::riscv_vlseg8:
2056 case Intrinsic::riscv_vlseg2ff:
2057 case Intrinsic::riscv_vlseg3ff:
2058 case Intrinsic::riscv_vlseg4ff:
2059 case Intrinsic::riscv_vlseg5ff:
2060 case Intrinsic::riscv_vlseg6ff:
2061 case Intrinsic::riscv_vlseg7ff:
2062 case Intrinsic::riscv_vlseg8ff:
2063 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
2064 /*IsStore*/ false,
2065 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
2066 case Intrinsic::riscv_vlseg2_mask:
2067 case Intrinsic::riscv_vlseg3_mask:
2068 case Intrinsic::riscv_vlseg4_mask:
2069 case Intrinsic::riscv_vlseg5_mask:
2070 case Intrinsic::riscv_vlseg6_mask:
2071 case Intrinsic::riscv_vlseg7_mask:
2072 case Intrinsic::riscv_vlseg8_mask:
2073 case Intrinsic::riscv_vlseg2ff_mask:
2074 case Intrinsic::riscv_vlseg3ff_mask:
2075 case Intrinsic::riscv_vlseg4ff_mask:
2076 case Intrinsic::riscv_vlseg5ff_mask:
2077 case Intrinsic::riscv_vlseg6ff_mask:
2078 case Intrinsic::riscv_vlseg7ff_mask:
2079 case Intrinsic::riscv_vlseg8ff_mask:
2080 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
2081 /*IsStore*/ false,
2082 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
2083 case Intrinsic::riscv_vlsseg2:
2084 case Intrinsic::riscv_vlsseg3:
2085 case Intrinsic::riscv_vlsseg4:
2086 case Intrinsic::riscv_vlsseg5:
2087 case Intrinsic::riscv_vlsseg6:
2088 case Intrinsic::riscv_vlsseg7:
2089 case Intrinsic::riscv_vlsseg8:
2090 case Intrinsic::riscv_vloxseg2:
2091 case Intrinsic::riscv_vloxseg3:
2092 case Intrinsic::riscv_vloxseg4:
2093 case Intrinsic::riscv_vloxseg5:
2094 case Intrinsic::riscv_vloxseg6:
2095 case Intrinsic::riscv_vloxseg7:
2096 case Intrinsic::riscv_vloxseg8:
2097 case Intrinsic::riscv_vluxseg2:
2098 case Intrinsic::riscv_vluxseg3:
2099 case Intrinsic::riscv_vluxseg4:
2100 case Intrinsic::riscv_vluxseg5:
2101 case Intrinsic::riscv_vluxseg6:
2102 case Intrinsic::riscv_vluxseg7:
2103 case Intrinsic::riscv_vluxseg8:
2104 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
2105 /*IsStore*/ false,
2106 /*IsUnitStrided*/ false);
2107 case Intrinsic::riscv_vlsseg2_mask:
2108 case Intrinsic::riscv_vlsseg3_mask:
2109 case Intrinsic::riscv_vlsseg4_mask:
2110 case Intrinsic::riscv_vlsseg5_mask:
2111 case Intrinsic::riscv_vlsseg6_mask:
2112 case Intrinsic::riscv_vlsseg7_mask:
2113 case Intrinsic::riscv_vlsseg8_mask:
2114 case Intrinsic::riscv_vloxseg2_mask:
2115 case Intrinsic::riscv_vloxseg3_mask:
2116 case Intrinsic::riscv_vloxseg4_mask:
2117 case Intrinsic::riscv_vloxseg5_mask:
2118 case Intrinsic::riscv_vloxseg6_mask:
2119 case Intrinsic::riscv_vloxseg7_mask:
2120 case Intrinsic::riscv_vloxseg8_mask:
2121 case Intrinsic::riscv_vluxseg2_mask:
2122 case Intrinsic::riscv_vluxseg3_mask:
2123 case Intrinsic::riscv_vluxseg4_mask:
2124 case Intrinsic::riscv_vluxseg5_mask:
2125 case Intrinsic::riscv_vluxseg6_mask:
2126 case Intrinsic::riscv_vluxseg7_mask:
2127 case Intrinsic::riscv_vluxseg8_mask:
2128 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 6,
2129 /*IsStore*/ false,
2130 /*IsUnitStrided*/ false);
2131 case Intrinsic::riscv_vsseg2:
2132 case Intrinsic::riscv_vsseg3:
2133 case Intrinsic::riscv_vsseg4:
2134 case Intrinsic::riscv_vsseg5:
2135 case Intrinsic::riscv_vsseg6:
2136 case Intrinsic::riscv_vsseg7:
2137 case Intrinsic::riscv_vsseg8:
2138 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
2139 /*IsStore*/ true,
2140 /*IsUnitStrided*/ false);
2141 case Intrinsic::riscv_vsseg2_mask:
2142 case Intrinsic::riscv_vsseg3_mask:
2143 case Intrinsic::riscv_vsseg4_mask:
2144 case Intrinsic::riscv_vsseg5_mask:
2145 case Intrinsic::riscv_vsseg6_mask:
2146 case Intrinsic::riscv_vsseg7_mask:
2147 case Intrinsic::riscv_vsseg8_mask:
2148 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
2149 /*IsStore*/ true,
2150 /*IsUnitStrided*/ false);
2151 case Intrinsic::riscv_vssseg2:
2152 case Intrinsic::riscv_vssseg3:
2153 case Intrinsic::riscv_vssseg4:
2154 case Intrinsic::riscv_vssseg5:
2155 case Intrinsic::riscv_vssseg6:
2156 case Intrinsic::riscv_vssseg7:
2157 case Intrinsic::riscv_vssseg8:
2158 case Intrinsic::riscv_vsoxseg2:
2159 case Intrinsic::riscv_vsoxseg3:
2160 case Intrinsic::riscv_vsoxseg4:
2161 case Intrinsic::riscv_vsoxseg5:
2162 case Intrinsic::riscv_vsoxseg6:
2163 case Intrinsic::riscv_vsoxseg7:
2164 case Intrinsic::riscv_vsoxseg8:
2165 case Intrinsic::riscv_vsuxseg2:
2166 case Intrinsic::riscv_vsuxseg3:
2167 case Intrinsic::riscv_vsuxseg4:
2168 case Intrinsic::riscv_vsuxseg5:
2169 case Intrinsic::riscv_vsuxseg6:
2170 case Intrinsic::riscv_vsuxseg7:
2171 case Intrinsic::riscv_vsuxseg8:
2172 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
2173 /*IsStore*/ true,
2174 /*IsUnitStrided*/ false);
2175 case Intrinsic::riscv_vssseg2_mask:
2176 case Intrinsic::riscv_vssseg3_mask:
2177 case Intrinsic::riscv_vssseg4_mask:
2178 case Intrinsic::riscv_vssseg5_mask:
2179 case Intrinsic::riscv_vssseg6_mask:
2180 case Intrinsic::riscv_vssseg7_mask:
2181 case Intrinsic::riscv_vssseg8_mask:
2182 case Intrinsic::riscv_vsoxseg2_mask:
2183 case Intrinsic::riscv_vsoxseg3_mask:
2184 case Intrinsic::riscv_vsoxseg4_mask:
2185 case Intrinsic::riscv_vsoxseg5_mask:
2186 case Intrinsic::riscv_vsoxseg6_mask:
2187 case Intrinsic::riscv_vsoxseg7_mask:
2188 case Intrinsic::riscv_vsoxseg8_mask:
2189 case Intrinsic::riscv_vsuxseg2_mask:
2190 case Intrinsic::riscv_vsuxseg3_mask:
2191 case Intrinsic::riscv_vsuxseg4_mask:
2192 case Intrinsic::riscv_vsuxseg5_mask:
2193 case Intrinsic::riscv_vsuxseg6_mask:
2194 case Intrinsic::riscv_vsuxseg7_mask:
2195 case Intrinsic::riscv_vsuxseg8_mask:
2196 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
2197 /*IsStore*/ true,
2198 /*IsUnitStrided*/ false);
2199 case Intrinsic::riscv_sf_vlte8:
2200 case Intrinsic::riscv_sf_vlte16:
2201 case Intrinsic::riscv_sf_vlte32:
2202 case Intrinsic::riscv_sf_vlte64:
2203 Info.opc = ISD::INTRINSIC_VOID;
2204 Info.ptrVal = I.getArgOperand(1);
2205 switch (Intrinsic) {
2206 case Intrinsic::riscv_sf_vlte8:
2207 Info.memVT = MVT::i8;
2208 Info.align = Align(1);
2209 break;
2210 case Intrinsic::riscv_sf_vlte16:
2211 Info.memVT = MVT::i16;
2212 Info.align = Align(2);
2213 break;
2214 case Intrinsic::riscv_sf_vlte32:
2215 Info.memVT = MVT::i32;
2216 Info.align = Align(4);
2217 break;
2218 case Intrinsic::riscv_sf_vlte64:
2219 Info.memVT = MVT::i64;
2220 Info.align = Align(8);
2221 break;
2222 }
2223 Info.size = MemoryLocation::UnknownSize;
2224 Info.flags |= MachineMemOperand::MOLoad;
2225 return true;
2226 case Intrinsic::riscv_sf_vste8:
2227 case Intrinsic::riscv_sf_vste16:
2228 case Intrinsic::riscv_sf_vste32:
2229 case Intrinsic::riscv_sf_vste64:
2230 Info.opc = ISD::INTRINSIC_VOID;
2231 Info.ptrVal = I.getArgOperand(1);
2232 switch (Intrinsic) {
2233 case Intrinsic::riscv_sf_vste8:
2234 Info.memVT = MVT::i8;
2235 Info.align = Align(1);
2236 break;
2237 case Intrinsic::riscv_sf_vste16:
2238 Info.memVT = MVT::i16;
2239 Info.align = Align(2);
2240 break;
2241 case Intrinsic::riscv_sf_vste32:
2242 Info.memVT = MVT::i32;
2243 Info.align = Align(4);
2244 break;
2245 case Intrinsic::riscv_sf_vste64:
2246 Info.memVT = MVT::i64;
2247 Info.align = Align(8);
2248 break;
2249 }
2250 Info.size = MemoryLocation::UnknownSize;
2251 Info.flags |= MachineMemOperand::MOStore;
2252 return true;
2253 }
2254}
2255
2257 const AddrMode &AM, Type *Ty,
2258 unsigned AS,
2259 Instruction *I) const {
2260 // No global is ever allowed as a base.
2261 if (AM.BaseGV)
2262 return false;
2263
2264 // None of our addressing modes allows a scalable offset
2265 if (AM.ScalableOffset)
2266 return false;
2267
2268 // RVV instructions only support register addressing.
2269 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
2270 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
2271
2272 // Require a 12-bit signed offset.
2273 if (!isInt<12>(AM.BaseOffs))
2274 return false;
2275
2276 switch (AM.Scale) {
2277 case 0: // "r+i" or just "i", depending on HasBaseReg.
2278 break;
2279 case 1:
2280 if (!AM.HasBaseReg) // allow "r+i".
2281 break;
2282 return false; // disallow "r+r" or "r+r+i".
2283 default:
2284 return false;
2285 }
2286
2287 return true;
2288}
2289
2291 return isInt<12>(Imm);
2292}
2293
2295 return isInt<12>(Imm);
2296}
2297
2298// On RV32, 64-bit integers are split into their high and low parts and held
2299// in two different registers, so the trunc is free since the low register can
2300// just be used.
2301// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
2302// isTruncateFree?
2304 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
2305 return false;
2306 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
2307 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
2308 return (SrcBits == 64 && DestBits == 32);
2309}
2310
2312 // We consider i64->i32 free on RV64 since we have good selection of W
2313 // instructions that make promoting operations back to i64 free in many cases.
2314 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
2315 !DstVT.isInteger())
2316 return false;
2317 unsigned SrcBits = SrcVT.getSizeInBits();
2318 unsigned DestBits = DstVT.getSizeInBits();
2319 return (SrcBits == 64 && DestBits == 32);
2320}
2321
2323 EVT SrcVT = Val.getValueType();
2324 // free truncate from vnsrl and vnsra
2325 if (Subtarget.hasVInstructions() &&
2326 (Val.getOpcode() == ISD::SRL || Val.getOpcode() == ISD::SRA) &&
2327 SrcVT.isVector() && VT2.isVector()) {
2328 unsigned SrcBits = SrcVT.getVectorElementType().getSizeInBits();
2329 unsigned DestBits = VT2.getVectorElementType().getSizeInBits();
2330 if (SrcBits == DestBits * 2) {
2331 return true;
2332 }
2333 }
2334 return TargetLowering::isTruncateFree(Val, VT2);
2335}
2336
2338 // Zexts are free if they can be combined with a load.
2339 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
2340 // poorly with type legalization of compares preferring sext.
2341 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
2342 EVT MemVT = LD->getMemoryVT();
2343 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
2344 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
2345 LD->getExtensionType() == ISD::ZEXTLOAD))
2346 return true;
2347 }
2348
2349 return TargetLowering::isZExtFree(Val, VT2);
2350}
2351
2353 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
2354}
2355
2357 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
2358}
2359
2361 return Subtarget.hasCTZLike();
2362}
2363
2365 return Subtarget.hasCLZLike();
2366}
2367
2369 const Instruction &AndI) const {
2370 // We expect to be able to match a bit extraction instruction if the Zbs
2371 // extension is supported and the mask is a power of two. However, we
2372 // conservatively return false if the mask would fit in an ANDI instruction,
2373 // on the basis that it's possible the sinking+duplication of the AND in
2374 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
2375 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
2376 if (!Subtarget.hasBEXTILike())
2377 return false;
2379 if (!Mask)
2380 return false;
2381 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
2382}
2383
2385 EVT VT = Y.getValueType();
2386
2387 if (VT.isVector())
2388 return false;
2389
2390 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
2391 (!isa<ConstantSDNode>(Y) || cast<ConstantSDNode>(Y)->isOpaque());
2392}
2393
2395 EVT VT = Y.getValueType();
2396
2397 if (!VT.isVector())
2398 return hasAndNotCompare(Y);
2399
2400 return Subtarget.hasStdExtZvkb();
2401}
2402
2404 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
2405 if (Subtarget.hasStdExtZbs())
2406 return X.getValueType().isScalarInteger();
2407 auto *C = dyn_cast<ConstantSDNode>(Y);
2408 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
2409 if (Subtarget.hasVendorXTHeadBs())
2410 return C != nullptr;
2411 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
2412 return C && C->getAPIntValue().ule(10);
2413}
2414
2416 unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X,
2417 SDValue Y) const {
2418 if (SelectOpcode != ISD::VSELECT)
2419 return false;
2420
2421 // Only enable for rvv.
2422 if (!VT.isVector() || !Subtarget.hasVInstructions())
2423 return false;
2424
2425 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
2426 return false;
2427
2428 return true;
2429}
2430
2432 Type *Ty) const {
2433 assert(Ty->isIntegerTy());
2434
2435 unsigned BitSize = Ty->getIntegerBitWidth();
2436 if (BitSize > Subtarget.getXLen())
2437 return false;
2438
2439 // Fast path, assume 32-bit immediates are cheap.
2440 int64_t Val = Imm.getSExtValue();
2441 if (isInt<32>(Val))
2442 return true;
2443
2444 // A constant pool entry may be more aligned than the load we're trying to
2445 // replace. If we don't support unaligned scalar mem, prefer the constant
2446 // pool.
2447 // TODO: Can the caller pass down the alignment?
2448 if (!Subtarget.enableUnalignedScalarMem())
2449 return true;
2450
2451 // Prefer to keep the load if it would require many instructions.
2452 // This uses the same threshold we use for constant pools but doesn't
2453 // check useConstantPoolForLargeInts.
2454 // TODO: Should we keep the load only when we're definitely going to emit a
2455 // constant pool?
2456
2458 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
2459}
2460
2464 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
2465 SelectionDAG &DAG) const {
2466 // One interesting pattern that we'd want to form is 'bit extract':
2467 // ((1 >> Y) & 1) ==/!= 0
2468 // But we also need to be careful not to try to reverse that fold.
2469
2470 // Is this '((1 >> Y) & 1)'?
2471 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
2472 return false; // Keep the 'bit extract' pattern.
2473
2474 // Will this be '((1 >> Y) & 1)' after the transform?
2475 if (NewShiftOpcode == ISD::SRL && CC->isOne())
2476 return true; // Do form the 'bit extract' pattern.
2477
2478 // If 'X' is a constant, and we transform, then we will immediately
2479 // try to undo the fold, thus causing endless combine loop.
2480 // So only do the transform if X is not a constant. This matches the default
2481 // implementation of this function.
2482 return !XC;
2483}
2484
2486 unsigned Opc = VecOp.getOpcode();
2487
2488 // Assume target opcodes can't be scalarized.
2489 // TODO - do we have any exceptions?
2490 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
2491 return false;
2492
2493 // If the vector op is not supported, try to convert to scalar.
2494 EVT VecVT = VecOp.getValueType();
2496 return true;
2497
2498 // If the vector op is supported, but the scalar op is not, the transform may
2499 // not be worthwhile.
2500 // Permit a vector binary operation can be converted to scalar binary
2501 // operation which is custom lowered with illegal type.
2502 EVT ScalarVT = VecVT.getScalarType();
2503 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2504 isOperationCustom(Opc, ScalarVT);
2505}
2506
2508 const GlobalAddressSDNode *GA) const {
2509 // In order to maximise the opportunity for common subexpression elimination,
2510 // keep a separate ADD node for the global address offset instead of folding
2511 // it in the global address node. Later peephole optimisations may choose to
2512 // fold it back in when profitable.
2513 return false;
2514}
2515
2516// Returns 0-31 if the fli instruction is available for the type and this is
2517// legal FP immediate for the type. Returns -1 otherwise.
2519 if (!Subtarget.hasStdExtZfa())
2520 return -1;
2521
2522 bool IsSupportedVT = false;
2523 if (VT == MVT::f16) {
2524 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2525 } else if (VT == MVT::f32) {
2526 IsSupportedVT = true;
2527 } else if (VT == MVT::f64) {
2528 assert(Subtarget.hasStdExtD() && "Expect D extension");
2529 IsSupportedVT = true;
2530 }
2531
2532 if (!IsSupportedVT)
2533 return -1;
2534
2535 return RISCVLoadFPImm::getLoadFPImm(Imm);
2536}
2537
2539 bool ForCodeSize) const {
2540 bool IsLegalVT = false;
2541 if (VT == MVT::f16)
2542 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2543 else if (VT == MVT::f32)
2544 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2545 else if (VT == MVT::f64)
2546 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2547 else if (VT == MVT::bf16)
2548 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2549
2550 if (!IsLegalVT)
2551 return false;
2552
2553 if (getLegalZfaFPImm(Imm, VT) >= 0)
2554 return true;
2555
2556 // Some constants can be produced by fli+fneg.
2557 if (Imm.isNegative() && getLegalZfaFPImm(-Imm, VT) >= 0)
2558 return true;
2559
2560 // Cannot create a 64 bit floating-point immediate value for rv32.
2561 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2562 // td can handle +0.0 or -0.0 already.
2563 // -0.0 can be created by fmv + fneg.
2564 return Imm.isZero();
2565 }
2566
2567 // Special case: fmv + fneg
2568 if (Imm.isNegZero())
2569 return true;
2570
2571 // Building an integer and then converting requires a fmv at the end of
2572 // the integer sequence. The fmv is not required for Zfinx.
2573 const int FmvCost = Subtarget.hasStdExtZfinx() ? 0 : 1;
2574 const int Cost =
2575 FmvCost + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(),
2576 Subtarget.getXLen(), Subtarget);
2577 return Cost <= FPImmCost;
2578}
2579
2580// TODO: This is very conservative.
2582 unsigned Index) const {
2584 return false;
2585
2586 // Extracts from index 0 are just subreg extracts.
2587 if (Index == 0)
2588 return true;
2589
2590 // Only support extracting a fixed from a fixed vector for now.
2591 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2592 return false;
2593
2594 EVT EltVT = ResVT.getVectorElementType();
2595 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2596
2597 // The smallest type we can slide is i8.
2598 // TODO: We can extract index 0 from a mask vector without a slide.
2599 if (EltVT == MVT::i1)
2600 return false;
2601
2602 unsigned ResElts = ResVT.getVectorNumElements();
2603 unsigned SrcElts = SrcVT.getVectorNumElements();
2604
2605 unsigned MinVLen = Subtarget.getRealMinVLen();
2606 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2607
2608 // If we're extracting only data from the first VLEN bits of the source
2609 // then we can always do this with an m1 vslidedown.vx. Restricting the
2610 // Index ensures we can use a vslidedown.vi.
2611 // TODO: We can generalize this when the exact VLEN is known.
2612 if (Index + ResElts <= MinVLMAX && Index < 31)
2613 return true;
2614
2615 // Convervatively only handle extracting half of a vector.
2616 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2617 // the upper half of a vector until we have more test coverage.
2618 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2619 // a cheap extract. However, this case is important in practice for
2620 // shuffled extracts of longer vectors. How resolve?
2621 return (ResElts * 2) == SrcElts && Index == ResElts;
2622}
2623
2625 CallingConv::ID CC,
2626 EVT VT) const {
2627 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2628 // We might still end up using a GPR but that will be decided based on ABI.
2629 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2630 !Subtarget.hasStdExtZfhminOrZhinxmin())
2631 return MVT::f32;
2632
2633 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
2634}
2635
2636unsigned
2638 std::optional<MVT> RegisterVT) const {
2639 // Pair inline assembly operand
2640 if (VT == (Subtarget.is64Bit() ? MVT::i128 : MVT::i64) && RegisterVT &&
2641 *RegisterVT == MVT::Untyped)
2642 return 1;
2643
2644 return TargetLowering::getNumRegisters(Context, VT, RegisterVT);
2645}
2646
2648 CallingConv::ID CC,
2649 EVT VT) const {
2650 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2651 // We might still end up using a GPR but that will be decided based on ABI.
2652 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2653 !Subtarget.hasStdExtZfhminOrZhinxmin())
2654 return 1;
2655
2656 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
2657}
2658
2659// Changes the condition code and swaps operands if necessary, so the SetCC
2660// operation matches one of the comparisons supported directly by branches
2661// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2662// with 1/-1.
2664 ISD::CondCode &CC, SelectionDAG &DAG,
2665 const RISCVSubtarget &Subtarget) {
2666 // If this is a single bit test that can't be handled by ANDI, shift the
2667 // bit to be tested to the MSB and perform a signed compare with 0.
2668 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2669 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2670 isa<ConstantSDNode>(LHS.getOperand(1)) &&
2671 // XAndesPerf supports branch on test bit.
2672 !Subtarget.hasVendorXAndesPerf()) {
2673 uint64_t Mask = LHS.getConstantOperandVal(1);
2674 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2675 unsigned ShAmt = 0;
2676 if (isPowerOf2_64(Mask)) {
2677 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
2678 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2679 } else {
2680 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2681 }
2682
2683 LHS = LHS.getOperand(0);
2684 if (ShAmt != 0)
2685 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2686 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2687 return;
2688 }
2689 }
2690
2691 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2692 int64_t C = RHSC->getSExtValue();
2693 switch (CC) {
2694 default: break;
2695 case ISD::SETGT:
2696 // Convert X > -1 to X >= 0.
2697 if (C == -1) {
2698 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2699 CC = ISD::SETGE;
2700 return;
2701 }
2702 if ((Subtarget.hasVendorXqcicm() || Subtarget.hasVendorXqcicli()) &&
2703 C != INT64_MAX && isInt<5>(C + 1)) {
2704 // We have a conditional move instruction for SETGE but not SETGT.
2705 // Convert X > C to X >= C + 1, if (C + 1) is a 5-bit signed immediate.
2706 RHS = DAG.getSignedConstant(C + 1, DL, RHS.getValueType());
2707 CC = ISD::SETGE;
2708 return;
2709 }
2710 if (Subtarget.hasVendorXqcibi() && C != INT64_MAX && isInt<16>(C + 1)) {
2711 // We have a branch immediate instruction for SETGE but not SETGT.
2712 // Convert X > C to X >= C + 1, if (C + 1) is a 16-bit signed immediate.
2713 RHS = DAG.getSignedConstant(C + 1, DL, RHS.getValueType());
2714 CC = ISD::SETGE;
2715 return;
2716 }
2717 break;
2718 case ISD::SETLT:
2719 // Convert X < 1 to 0 >= X.
2720 if (C == 1) {
2721 RHS = LHS;
2722 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2723 CC = ISD::SETGE;
2724 return;
2725 }
2726 break;
2727 case ISD::SETUGT:
2728 if ((Subtarget.hasVendorXqcicm() || Subtarget.hasVendorXqcicli()) &&
2729 C != INT64_MAX && isUInt<5>(C + 1)) {
2730 // We have a conditional move instruction for SETUGE but not SETUGT.
2731 // Convert X > C to X >= C + 1, if (C + 1) is a 5-bit signed immediate.
2732 RHS = DAG.getConstant(C + 1, DL, RHS.getValueType());
2733 CC = ISD::SETUGE;
2734 return;
2735 }
2736 if (Subtarget.hasVendorXqcibi() && C != INT64_MAX && isUInt<16>(C + 1)) {
2737 // We have a branch immediate instruction for SETUGE but not SETUGT.
2738 // Convert X > C to X >= C + 1, if (C + 1) is a 16-bit unsigned
2739 // immediate.
2740 RHS = DAG.getConstant(C + 1, DL, RHS.getValueType());
2741 CC = ISD::SETUGE;
2742 return;
2743 }
2744 break;
2745 }
2746 }
2747
2748 switch (CC) {
2749 default:
2750 break;
2751 case ISD::SETGT:
2752 case ISD::SETLE:
2753 case ISD::SETUGT:
2754 case ISD::SETULE:
2756 std::swap(LHS, RHS);
2757 break;
2758 }
2759}
2760
2762 if (VT.isRISCVVectorTuple()) {
2763 if (VT.SimpleTy >= MVT::riscv_nxv1i8x2 &&
2764 VT.SimpleTy <= MVT::riscv_nxv1i8x8)
2765 return RISCVVType::LMUL_F8;
2766 if (VT.SimpleTy >= MVT::riscv_nxv2i8x2 &&
2767 VT.SimpleTy <= MVT::riscv_nxv2i8x8)
2768 return RISCVVType::LMUL_F4;
2769 if (VT.SimpleTy >= MVT::riscv_nxv4i8x2 &&
2770 VT.SimpleTy <= MVT::riscv_nxv4i8x8)
2771 return RISCVVType::LMUL_F2;
2772 if (VT.SimpleTy >= MVT::riscv_nxv8i8x2 &&
2773 VT.SimpleTy <= MVT::riscv_nxv8i8x8)
2774 return RISCVVType::LMUL_1;
2775 if (VT.SimpleTy >= MVT::riscv_nxv16i8x2 &&
2776 VT.SimpleTy <= MVT::riscv_nxv16i8x4)
2777 return RISCVVType::LMUL_2;
2778 if (VT.SimpleTy == MVT::riscv_nxv32i8x2)
2779 return RISCVVType::LMUL_4;
2780 llvm_unreachable("Invalid vector tuple type LMUL.");
2781 }
2782
2783 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2784 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2785 if (VT.getVectorElementType() == MVT::i1)
2786 KnownSize *= 8;
2787
2788 switch (KnownSize) {
2789 default:
2790 llvm_unreachable("Invalid LMUL.");
2791 case 8:
2792 return RISCVVType::LMUL_F8;
2793 case 16:
2794 return RISCVVType::LMUL_F4;
2795 case 32:
2796 return RISCVVType::LMUL_F2;
2797 case 64:
2798 return RISCVVType::LMUL_1;
2799 case 128:
2800 return RISCVVType::LMUL_2;
2801 case 256:
2802 return RISCVVType::LMUL_4;
2803 case 512:
2804 return RISCVVType::LMUL_8;
2805 }
2806}
2807
2809 switch (LMul) {
2810 default:
2811 llvm_unreachable("Invalid LMUL.");
2815 case RISCVVType::LMUL_1:
2816 return RISCV::VRRegClassID;
2817 case RISCVVType::LMUL_2:
2818 return RISCV::VRM2RegClassID;
2819 case RISCVVType::LMUL_4:
2820 return RISCV::VRM4RegClassID;
2821 case RISCVVType::LMUL_8:
2822 return RISCV::VRM8RegClassID;
2823 }
2824}
2825
2826unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
2827 RISCVVType::VLMUL LMUL = getLMUL(VT);
2828 if (LMUL == RISCVVType::LMUL_F8 || LMUL == RISCVVType::LMUL_F4 ||
2829 LMUL == RISCVVType::LMUL_F2 || LMUL == RISCVVType::LMUL_1) {
2830 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2831 "Unexpected subreg numbering");
2832 return RISCV::sub_vrm1_0 + Index;
2833 }
2834 if (LMUL == RISCVVType::LMUL_2) {
2835 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2836 "Unexpected subreg numbering");
2837 return RISCV::sub_vrm2_0 + Index;
2838 }
2839 if (LMUL == RISCVVType::LMUL_4) {
2840 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2841 "Unexpected subreg numbering");
2842 return RISCV::sub_vrm4_0 + Index;
2843 }
2844 llvm_unreachable("Invalid vector type.");
2845}
2846
2848 if (VT.isRISCVVectorTuple()) {
2849 unsigned NF = VT.getRISCVVectorTupleNumFields();
2850 unsigned RegsPerField =
2851 std::max(1U, (unsigned)VT.getSizeInBits().getKnownMinValue() /
2852 (NF * RISCV::RVVBitsPerBlock));
2853 switch (RegsPerField) {
2854 case 1:
2855 if (NF == 2)
2856 return RISCV::VRN2M1RegClassID;
2857 if (NF == 3)
2858 return RISCV::VRN3M1RegClassID;
2859 if (NF == 4)
2860 return RISCV::VRN4M1RegClassID;
2861 if (NF == 5)
2862 return RISCV::VRN5M1RegClassID;
2863 if (NF == 6)
2864 return RISCV::VRN6M1RegClassID;
2865 if (NF == 7)
2866 return RISCV::VRN7M1RegClassID;
2867 if (NF == 8)
2868 return RISCV::VRN8M1RegClassID;
2869 break;
2870 case 2:
2871 if (NF == 2)
2872 return RISCV::VRN2M2RegClassID;
2873 if (NF == 3)
2874 return RISCV::VRN3M2RegClassID;
2875 if (NF == 4)
2876 return RISCV::VRN4M2RegClassID;
2877 break;
2878 case 4:
2879 assert(NF == 2);
2880 return RISCV::VRN2M4RegClassID;
2881 default:
2882 break;
2883 }
2884 llvm_unreachable("Invalid vector tuple type RegClass.");
2885 }
2886
2887 if (VT.getVectorElementType() == MVT::i1)
2888 return RISCV::VRRegClassID;
2889 return getRegClassIDForLMUL(getLMUL(VT));
2890}
2891
2892// Attempt to decompose a subvector insert/extract between VecVT and
2893// SubVecVT via subregister indices. Returns the subregister index that
2894// can perform the subvector insert/extract with the given element index, as
2895// well as the index corresponding to any leftover subvectors that must be
2896// further inserted/extracted within the register class for SubVecVT.
2897std::pair<unsigned, unsigned>
2899 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2900 const RISCVRegisterInfo *TRI) {
2901 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2902 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2903 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2904 "Register classes not ordered");
2905 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2906 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2907
2908 // If VecVT is a vector tuple type, either it's the tuple type with same
2909 // RegClass with SubVecVT or SubVecVT is a actually a subvector of the VecVT.
2910 if (VecVT.isRISCVVectorTuple()) {
2911 if (VecRegClassID == SubRegClassID)
2912 return {RISCV::NoSubRegister, 0};
2913
2914 assert(SubVecVT.isScalableVector() &&
2915 "Only allow scalable vector subvector.");
2916 assert(getLMUL(VecVT) == getLMUL(SubVecVT) &&
2917 "Invalid vector tuple insert/extract for vector and subvector with "
2918 "different LMUL.");
2919 return {getSubregIndexByMVT(VecVT, InsertExtractIdx), 0};
2920 }
2921
2922 // Try to compose a subregister index that takes us from the incoming
2923 // LMUL>1 register class down to the outgoing one. At each step we half
2924 // the LMUL:
2925 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2926 // Note that this is not guaranteed to find a subregister index, such as
2927 // when we are extracting from one VR type to another.
2928 unsigned SubRegIdx = RISCV::NoSubRegister;
2929 for (const unsigned RCID :
2930 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2931 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2932 VecVT = VecVT.getHalfNumVectorElementsVT();
2933 bool IsHi =
2934 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2935 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2936 getSubregIndexByMVT(VecVT, IsHi));
2937 if (IsHi)
2938 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2939 }
2940 return {SubRegIdx, InsertExtractIdx};
2941}
2942
2943// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2944// stores for those types.
2945bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2946 return !Subtarget.useRVVForFixedLengthVectors() ||
2947 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2948}
2949
2951 if (!ScalarTy.isSimple())
2952 return false;
2953 switch (ScalarTy.getSimpleVT().SimpleTy) {
2954 case MVT::iPTR:
2955 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2956 case MVT::i8:
2957 case MVT::i16:
2958 case MVT::i32:
2959 return Subtarget.hasVInstructions();
2960 case MVT::i64:
2961 return Subtarget.hasVInstructionsI64();
2962 case MVT::f16:
2963 return Subtarget.hasVInstructionsF16Minimal();
2964 case MVT::bf16:
2965 return Subtarget.hasVInstructionsBF16Minimal();
2966 case MVT::f32:
2967 return Subtarget.hasVInstructionsF32();
2968 case MVT::f64:
2969 return Subtarget.hasVInstructionsF64();
2970 default:
2971 return false;
2972 }
2973}
2974
2975
2977 return NumRepeatedDivisors;
2978}
2979
2981 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2982 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2983 "Unexpected opcode");
2984 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2985 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2987 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2988 if (!II)
2989 return SDValue();
2990 return Op.getOperand(II->VLOperand + 1 + HasChain);
2991}
2992
2994 const RISCVSubtarget &Subtarget) {
2995 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2996 if (!Subtarget.useRVVForFixedLengthVectors())
2997 return false;
2998
2999 // We only support a set of vector types with a consistent maximum fixed size
3000 // across all supported vector element types to avoid legalization issues.
3001 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
3002 // fixed-length vector type we support is 1024 bytes.
3003 if (VT.getVectorNumElements() > 1024 || VT.getFixedSizeInBits() > 1024 * 8)
3004 return false;
3005
3006 unsigned MinVLen = Subtarget.getRealMinVLen();
3007
3008 MVT EltVT = VT.getVectorElementType();
3009
3010 // Don't use RVV for vectors we cannot scalarize if required.
3011 switch (EltVT.SimpleTy) {
3012 // i1 is supported but has different rules.
3013 default:
3014 return false;
3015 case MVT::i1:
3016 // Masks can only use a single register.
3017 if (VT.getVectorNumElements() > MinVLen)
3018 return false;
3019 MinVLen /= 8;
3020 break;
3021 case MVT::i8:
3022 case MVT::i16:
3023 case MVT::i32:
3024 break;
3025 case MVT::i64:
3026 if (!Subtarget.hasVInstructionsI64())
3027 return false;
3028 break;
3029 case MVT::f16:
3030 if (!Subtarget.hasVInstructionsF16Minimal())
3031 return false;
3032 break;
3033 case MVT::bf16:
3034 if (!Subtarget.hasVInstructionsBF16Minimal())
3035 return false;
3036 break;
3037 case MVT::f32:
3038 if (!Subtarget.hasVInstructionsF32())
3039 return false;
3040 break;
3041 case MVT::f64:
3042 if (!Subtarget.hasVInstructionsF64())
3043 return false;
3044 break;
3045 }
3046
3047 // Reject elements larger than ELEN.
3048 if (EltVT.getSizeInBits() > Subtarget.getELen())
3049 return false;
3050
3051 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
3052 // Don't use RVV for types that don't fit.
3053 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
3054 return false;
3055
3056 // TODO: Perhaps an artificial restriction, but worth having whilst getting
3057 // the base fixed length RVV support in place.
3058 if (!VT.isPow2VectorType())
3059 return false;
3060
3061 return true;
3062}
3063
3064bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
3065 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
3066}
3067
3068// Return the largest legal scalable vector type that matches VT's element type.
3070 const RISCVSubtarget &Subtarget) {
3071 // This may be called before legal types are setup.
3072 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
3073 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
3074 "Expected legal fixed length vector!");
3075
3076 unsigned MinVLen = Subtarget.getRealMinVLen();
3077 unsigned MaxELen = Subtarget.getELen();
3078
3079 MVT EltVT = VT.getVectorElementType();
3080 switch (EltVT.SimpleTy) {
3081 default:
3082 llvm_unreachable("unexpected element type for RVV container");
3083 case MVT::i1:
3084 case MVT::i8:
3085 case MVT::i16:
3086 case MVT::i32:
3087 case MVT::i64:
3088 case MVT::bf16:
3089 case MVT::f16:
3090 case MVT::f32:
3091 case MVT::f64: {
3092 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
3093 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
3094 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
3095 unsigned NumElts =
3097 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
3098 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
3099 return MVT::getScalableVectorVT(EltVT, NumElts);
3100 }
3101 }
3102}
3103
3105 const RISCVSubtarget &Subtarget) {
3107 Subtarget);
3108}
3109
3111 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
3112}
3113
3114// Grow V to consume an entire RVV register.
3116 const RISCVSubtarget &Subtarget) {
3117 assert(VT.isScalableVector() &&
3118 "Expected to convert into a scalable vector!");
3119 assert(V.getValueType().isFixedLengthVector() &&
3120 "Expected a fixed length vector operand!");
3121 SDLoc DL(V);
3122 return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), V, 0);
3123}
3124
3125// Shrink V so it's just big enough to maintain a VT's worth of data.
3127 const RISCVSubtarget &Subtarget) {
3129 "Expected to convert into a fixed length vector!");
3130 assert(V.getValueType().isScalableVector() &&
3131 "Expected a scalable vector operand!");
3132 SDLoc DL(V);
3133 return DAG.getExtractSubvector(DL, VT, V, 0);
3134}
3135
3136/// Return the type of the mask type suitable for masking the provided
3137/// vector type. This is simply an i1 element type vector of the same
3138/// (possibly scalable) length.
3139static MVT getMaskTypeFor(MVT VecVT) {
3140 assert(VecVT.isVector());
3142 return MVT::getVectorVT(MVT::i1, EC);
3143}
3144
3145/// Creates an all ones mask suitable for masking a vector of type VecTy with
3146/// vector length VL. .
3147static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
3148 SelectionDAG &DAG) {
3149 MVT MaskVT = getMaskTypeFor(VecVT);
3150 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
3151}
3152
3153static std::pair<SDValue, SDValue>
3155 const RISCVSubtarget &Subtarget) {
3156 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
3157 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
3158 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
3159 return {Mask, VL};
3160}
3161
3162static std::pair<SDValue, SDValue>
3163getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
3164 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
3165 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
3166 SDValue VL = DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
3167 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
3168 return {Mask, VL};
3169}
3170
3171// Gets the two common "VL" operands: an all-ones mask and the vector length.
3172// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
3173// the vector type that the fixed-length vector is contained in. Otherwise if
3174// VecVT is scalable, then ContainerVT should be the same as VecVT.
3175static std::pair<SDValue, SDValue>
3176getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
3177 const RISCVSubtarget &Subtarget) {
3178 if (VecVT.isFixedLengthVector())
3179 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
3180 Subtarget);
3181 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
3182 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
3183}
3184
3186 SelectionDAG &DAG) const {
3187 assert(VecVT.isScalableVector() && "Expected scalable vector");
3188 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
3189 VecVT.getVectorElementCount());
3190}
3191
3192std::pair<unsigned, unsigned>
3194 const RISCVSubtarget &Subtarget) {
3195 assert(VecVT.isScalableVector() && "Expected scalable vector");
3196
3197 unsigned EltSize = VecVT.getScalarSizeInBits();
3198 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
3199
3200 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
3201 unsigned MaxVLMAX =
3202 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
3203
3204 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
3205 unsigned MinVLMAX =
3206 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
3207
3208 return std::make_pair(MinVLMAX, MaxVLMAX);
3209}
3210
3211// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
3212// of either is (currently) supported. This can get us into an infinite loop
3213// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
3214// as a ..., etc.
3215// Until either (or both) of these can reliably lower any node, reporting that
3216// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
3217// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
3218// which is not desirable.
3220 EVT VT, unsigned DefinedValues) const {
3221 return false;
3222}
3223
3225 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
3226 // implementation-defined.
3227 if (!VT.isVector())
3229 unsigned DLenFactor = Subtarget.getDLenFactor();
3230 unsigned Cost;
3231 if (VT.isScalableVector()) {
3232 unsigned LMul;
3233 bool Fractional;
3234 std::tie(LMul, Fractional) =
3236 if (Fractional)
3237 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
3238 else
3239 Cost = (LMul * DLenFactor);
3240 } else {
3241 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
3242 }
3243 return Cost;
3244}
3245
3246
3247/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
3248/// may be quadratic in the number of vreg implied by LMUL, and is assumed to
3249/// be by default. VRGatherCostModel reflects available options. Note that
3250/// operand (index and possibly mask) are handled separately.
3252 auto LMULCost = getLMULCost(VT);
3253 bool Log2CostModel =
3254 Subtarget.getVRGatherCostModel() == llvm::RISCVSubtarget::NLog2N;
3255 if (Log2CostModel && LMULCost.isValid()) {
3256 unsigned Log = Log2_64(LMULCost.getValue());
3257 if (Log > 0)
3258 return LMULCost * Log;
3259 }
3260 return LMULCost * LMULCost;
3261}
3262
3263/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
3264/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
3265/// or may track the vrgather.vv cost. It is implementation-dependent.
3269
3270/// Return the cost of a vslidedown.vx or vslideup.vx instruction
3271/// for the type VT. (This does not cover the vslide1up or vslide1down
3272/// variants.) Slides may be linear in the number of vregs implied by LMUL,
3273/// or may track the vrgather.vv cost. It is implementation-dependent.
3277
3278/// Return the cost of a vslidedown.vi or vslideup.vi instruction
3279/// for the type VT. (This does not cover the vslide1up or vslide1down
3280/// variants.) Slides may be linear in the number of vregs implied by LMUL,
3281/// or may track the vrgather.vv cost. It is implementation-dependent.
3285
3287 const RISCVSubtarget &Subtarget) {
3288 // f16 conversions are promoted to f32 when Zfh/Zhinx are not supported.
3289 // bf16 conversions are always promoted to f32.
3290 if ((Op.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3291 Op.getValueType() == MVT::bf16) {
3292 bool IsStrict = Op->isStrictFPOpcode();
3293
3294 SDLoc DL(Op);
3295 if (IsStrict) {
3296 SDValue Val = DAG.getNode(Op.getOpcode(), DL, {MVT::f32, MVT::Other},
3297 {Op.getOperand(0), Op.getOperand(1)});
3298 return DAG.getNode(ISD::STRICT_FP_ROUND, DL,
3299 {Op.getValueType(), MVT::Other},
3300 {Val.getValue(1), Val.getValue(0),
3301 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)});
3302 }
3303 return DAG.getNode(
3304 ISD::FP_ROUND, DL, Op.getValueType(),
3305 DAG.getNode(Op.getOpcode(), DL, MVT::f32, Op.getOperand(0)),
3306 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
3307 }
3308
3309 // Other operations are legal.
3310 return Op;
3311}
3312
3314 const RISCVSubtarget &Subtarget) {
3315 // RISC-V FP-to-int conversions saturate to the destination register size, but
3316 // don't produce 0 for nan. We can use a conversion instruction and fix the
3317 // nan case with a compare and a select.
3318 SDValue Src = Op.getOperand(0);
3319
3320 MVT DstVT = Op.getSimpleValueType();
3321 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
3322
3323 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
3324
3325 if (!DstVT.isVector()) {
3326 // For bf16 or for f16 in absence of Zfh, promote to f32, then saturate
3327 // the result.
3328 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3329 Src.getValueType() == MVT::bf16) {
3330 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
3331 }
3332
3333 unsigned Opc;
3334 if (SatVT == DstVT)
3335 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
3336 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
3337 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
3338 else
3339 return SDValue();
3340 // FIXME: Support other SatVTs by clamping before or after the conversion.
3341
3342 SDLoc DL(Op);
3343 SDValue FpToInt = DAG.getNode(
3344 Opc, DL, DstVT, Src,
3346
3347 if (Opc == RISCVISD::FCVT_WU_RV64)
3348 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
3349
3350 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
3351 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
3353 }
3354
3355 // Vectors.
3356
3357 MVT DstEltVT = DstVT.getVectorElementType();
3358 MVT SrcVT = Src.getSimpleValueType();
3359 MVT SrcEltVT = SrcVT.getVectorElementType();
3360 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
3361 unsigned DstEltSize = DstEltVT.getSizeInBits();
3362
3363 // Only handle saturating to the destination type.
3364 if (SatVT != DstEltVT)
3365 return SDValue();
3366
3367 MVT DstContainerVT = DstVT;
3368 MVT SrcContainerVT = SrcVT;
3369 if (DstVT.isFixedLengthVector()) {
3370 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
3371 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3372 assert(DstContainerVT.getVectorElementCount() ==
3373 SrcContainerVT.getVectorElementCount() &&
3374 "Expected same element count");
3375 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3376 }
3377
3378 SDLoc DL(Op);
3379
3380 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
3381
3382 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
3383 {Src, Src, DAG.getCondCode(ISD::SETNE),
3384 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
3385
3386 // Need to widen by more than 1 step, promote the FP type, then do a widening
3387 // convert.
3388 if (DstEltSize > (2 * SrcEltSize)) {
3389 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
3390 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
3391 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
3392 }
3393
3394 MVT CvtContainerVT = DstContainerVT;
3395 MVT CvtEltVT = DstEltVT;
3396 if (SrcEltSize > (2 * DstEltSize)) {
3397 CvtEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
3398 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
3399 }
3400
3401 unsigned RVVOpc =
3402 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
3403 SDValue Res = DAG.getNode(RVVOpc, DL, CvtContainerVT, Src, Mask, VL);
3404
3405 while (CvtContainerVT != DstContainerVT) {
3406 CvtEltVT = MVT::getIntegerVT(CvtEltVT.getSizeInBits() / 2);
3407 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
3408 // Rounding mode here is arbitrary since we aren't shifting out any bits.
3409 unsigned ClipOpc = IsSigned ? RISCVISD::TRUNCATE_VECTOR_VL_SSAT
3410 : RISCVISD::TRUNCATE_VECTOR_VL_USAT;
3411 Res = DAG.getNode(ClipOpc, DL, CvtContainerVT, Res, Mask, VL);
3412 }
3413
3414 SDValue SplatZero = DAG.getNode(
3415 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
3416 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
3417 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
3418 Res, DAG.getUNDEF(DstContainerVT), VL);
3419
3420 if (DstVT.isFixedLengthVector())
3421 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
3422
3423 return Res;
3424}
3425
3427 const RISCVSubtarget &Subtarget) {
3428 bool IsStrict = Op->isStrictFPOpcode();
3429 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3430
3431 // f16 conversions are promoted to f32 when Zfh/Zhinx is not enabled.
3432 // bf16 conversions are always promoted to f32.
3433 if ((SrcVal.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3434 SrcVal.getValueType() == MVT::bf16) {
3435 SDLoc DL(Op);
3436 if (IsStrict) {
3437 SDValue Ext =
3438 DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
3439 {Op.getOperand(0), SrcVal});
3440 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
3441 {Ext.getValue(1), Ext.getValue(0)});
3442 }
3443 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
3444 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, SrcVal));
3445 }
3446
3447 // Other operations are legal.
3448 return Op;
3449}
3450
3452 switch (Opc) {
3453 case ISD::FROUNDEVEN:
3455 case ISD::VP_FROUNDEVEN:
3456 return RISCVFPRndMode::RNE;
3457 case ISD::FTRUNC:
3458 case ISD::STRICT_FTRUNC:
3459 case ISD::VP_FROUNDTOZERO:
3460 return RISCVFPRndMode::RTZ;
3461 case ISD::FFLOOR:
3462 case ISD::STRICT_FFLOOR:
3463 case ISD::VP_FFLOOR:
3464 return RISCVFPRndMode::RDN;
3465 case ISD::FCEIL:
3466 case ISD::STRICT_FCEIL:
3467 case ISD::VP_FCEIL:
3468 return RISCVFPRndMode::RUP;
3469 case ISD::FROUND:
3470 case ISD::LROUND:
3471 case ISD::LLROUND:
3472 case ISD::STRICT_FROUND:
3473 case ISD::STRICT_LROUND:
3475 case ISD::VP_FROUND:
3476 return RISCVFPRndMode::RMM;
3477 case ISD::FRINT:
3478 case ISD::LRINT:
3479 case ISD::LLRINT:
3480 case ISD::STRICT_FRINT:
3481 case ISD::STRICT_LRINT:
3482 case ISD::STRICT_LLRINT:
3483 case ISD::VP_FRINT:
3484 case ISD::VP_LRINT:
3485 case ISD::VP_LLRINT:
3486 return RISCVFPRndMode::DYN;
3487 }
3488
3490}
3491
3492// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
3493// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
3494// the integer domain and back. Taking care to avoid converting values that are
3495// nan or already correct.
3496static SDValue
3498 const RISCVSubtarget &Subtarget) {
3499 MVT VT = Op.getSimpleValueType();
3500 assert(VT.isVector() && "Unexpected type");
3501
3502 SDLoc DL(Op);
3503
3504 SDValue Src = Op.getOperand(0);
3505
3506 // Freeze the source since we are increasing the number of uses.
3507 Src = DAG.getFreeze(Src);
3508
3509 MVT ContainerVT = VT;
3510 if (VT.isFixedLengthVector()) {
3511 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3512 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3513 }
3514
3515 SDValue Mask, VL;
3516 if (Op->isVPOpcode()) {
3517 Mask = Op.getOperand(1);
3518 if (VT.isFixedLengthVector())
3519 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
3520 Subtarget);
3521 VL = Op.getOperand(2);
3522 } else {
3523 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3524 }
3525
3526 // We do the conversion on the absolute value and fix the sign at the end.
3527 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3528
3529 // Determine the largest integer that can be represented exactly. This and
3530 // values larger than it don't have any fractional bits so don't need to
3531 // be converted.
3532 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3533 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3534 APFloat MaxVal = APFloat(FltSem);
3535 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3536 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3537 SDValue MaxValNode =
3538 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3539 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3540 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3541
3542 // If abs(Src) was larger than MaxVal or nan, keep it.
3543 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3544 Mask =
3545 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
3546 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
3547 Mask, Mask, VL});
3548
3549 // Truncate to integer and convert back to FP.
3550 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3551 MVT XLenVT = Subtarget.getXLenVT();
3552 SDValue Truncated;
3553
3554 switch (Op.getOpcode()) {
3555 default:
3556 llvm_unreachable("Unexpected opcode");
3557 case ISD::FRINT:
3558 case ISD::VP_FRINT:
3559 case ISD::FCEIL:
3560 case ISD::VP_FCEIL:
3561 case ISD::FFLOOR:
3562 case ISD::VP_FFLOOR:
3563 case ISD::FROUND:
3564 case ISD::FROUNDEVEN:
3565 case ISD::VP_FROUND:
3566 case ISD::VP_FROUNDEVEN:
3567 case ISD::VP_FROUNDTOZERO: {
3570 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
3571 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
3572 break;
3573 }
3574 case ISD::FTRUNC:
3575 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
3576 Mask, VL);
3577 break;
3578 case ISD::FNEARBYINT:
3579 case ISD::VP_FNEARBYINT:
3580 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3581 Mask, VL);
3582 break;
3583 }
3584
3585 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3586 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3587 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3588 Mask, VL);
3589
3590 // Restore the original sign so that -0.0 is preserved.
3591 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3592 Src, Src, Mask, VL);
3593
3594 if (!VT.isFixedLengthVector())
3595 return Truncated;
3596
3597 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3598}
3599
3600// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3601// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3602// qNan and converting the new source to integer and back to FP.
3603static SDValue
3605 const RISCVSubtarget &Subtarget) {
3606 SDLoc DL(Op);
3607 MVT VT = Op.getSimpleValueType();
3608 SDValue Chain = Op.getOperand(0);
3609 SDValue Src = Op.getOperand(1);
3610
3611 MVT ContainerVT = VT;
3612 if (VT.isFixedLengthVector()) {
3613 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3614 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3615 }
3616
3617 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3618
3619 // Freeze the source since we are increasing the number of uses.
3620 Src = DAG.getFreeze(Src);
3621
3622 // Convert sNan to qNan by executing x + x for all unordered element x in Src.
3623 MVT MaskVT = Mask.getSimpleValueType();
3624 SDValue Unorder = DAG.getNode(RISCVISD::STRICT_FSETCC_VL, DL,
3625 DAG.getVTList(MaskVT, MVT::Other),
3626 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3627 DAG.getUNDEF(MaskVT), Mask, VL});
3628 Chain = Unorder.getValue(1);
3629 Src = DAG.getNode(RISCVISD::STRICT_FADD_VL, DL,
3630 DAG.getVTList(ContainerVT, MVT::Other),
3631 {Chain, Src, Src, Src, Unorder, VL});
3632 Chain = Src.getValue(1);
3633
3634 // We do the conversion on the absolute value and fix the sign at the end.
3635 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3636
3637 // Determine the largest integer that can be represented exactly. This and
3638 // values larger than it don't have any fractional bits so don't need to
3639 // be converted.
3640 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3641 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3642 APFloat MaxVal = APFloat(FltSem);
3643 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3644 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3645 SDValue MaxValNode =
3646 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3647 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3648 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3649
3650 // If abs(Src) was larger than MaxVal or nan, keep it.
3651 Mask = DAG.getNode(
3652 RISCVISD::SETCC_VL, DL, MaskVT,
3653 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3654
3655 // Truncate to integer and convert back to FP.
3656 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3657 MVT XLenVT = Subtarget.getXLenVT();
3658 SDValue Truncated;
3659
3660 switch (Op.getOpcode()) {
3661 default:
3662 llvm_unreachable("Unexpected opcode");
3663 case ISD::STRICT_FCEIL:
3664 case ISD::STRICT_FFLOOR:
3665 case ISD::STRICT_FROUND:
3669 Truncated = DAG.getNode(
3670 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3671 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3672 break;
3673 }
3674 case ISD::STRICT_FTRUNC:
3675 Truncated =
3676 DAG.getNode(RISCVISD::STRICT_VFCVT_RTZ_X_F_VL, DL,
3677 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3678 break;
3680 Truncated = DAG.getNode(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL, DL,
3681 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3682 Mask, VL);
3683 break;
3684 }
3685 Chain = Truncated.getValue(1);
3686
3687 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3688 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3689 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3690 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3691 Truncated, Mask, VL);
3692 Chain = Truncated.getValue(1);
3693 }
3694
3695 // Restore the original sign so that -0.0 is preserved.
3696 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3697 Src, Src, Mask, VL);
3698
3699 if (VT.isFixedLengthVector())
3700 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3701 return DAG.getMergeValues({Truncated, Chain}, DL);
3702}
3703
3704static SDValue
3706 const RISCVSubtarget &Subtarget) {
3707 MVT VT = Op.getSimpleValueType();
3708 if (VT.isVector())
3709 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3710
3711 if (DAG.shouldOptForSize())
3712 return SDValue();
3713
3714 SDLoc DL(Op);
3715 SDValue Src = Op.getOperand(0);
3716
3717 // Create an integer the size of the mantissa with the MSB set. This and all
3718 // values larger than it don't have any fractional bits so don't need to be
3719 // converted.
3720 const fltSemantics &FltSem = VT.getFltSemantics();
3721 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3722 APFloat MaxVal = APFloat(FltSem);
3723 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3724 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3725 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3726
3728 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3729 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3730}
3731
3732// Expand vector [L]LRINT and [L]LROUND by converting to the integer domain.
3734 const RISCVSubtarget &Subtarget) {
3735 SDLoc DL(Op);
3736 MVT DstVT = Op.getSimpleValueType();
3737 SDValue Src = Op.getOperand(0);
3738 MVT SrcVT = Src.getSimpleValueType();
3739 assert(SrcVT.isVector() && DstVT.isVector() &&
3740 !(SrcVT.isFixedLengthVector() ^ DstVT.isFixedLengthVector()) &&
3741 "Unexpected type");
3742
3743 MVT DstContainerVT = DstVT;
3744 MVT SrcContainerVT = SrcVT;
3745
3746 if (DstVT.isFixedLengthVector()) {
3747 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
3748 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3749 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3750 }
3751
3752 auto [Mask, VL] = getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
3753
3754 // [b]f16 -> f32
3755 MVT SrcElemType = SrcVT.getVectorElementType();
3756 if (SrcElemType == MVT::f16 || SrcElemType == MVT::bf16) {
3757 MVT F32VT = SrcContainerVT.changeVectorElementType(MVT::f32);
3758 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, F32VT, Src, Mask, VL);
3759 }
3760
3761 SDValue Res =
3762 DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, DstContainerVT, Src, Mask,
3763 DAG.getTargetConstant(matchRoundingOp(Op.getOpcode()), DL,
3764 Subtarget.getXLenVT()),
3765 VL);
3766
3767 if (!DstVT.isFixedLengthVector())
3768 return Res;
3769
3770 return convertFromScalableVector(DstVT, Res, DAG, Subtarget);
3771}
3772
3773static SDValue
3775 const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op,
3776 SDValue Offset, SDValue Mask, SDValue VL,
3778 if (Passthru.isUndef())
3780 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3781 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3782 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3783}
3784
3785static SDValue
3786getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3787 EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask,
3788 SDValue VL,
3790 if (Passthru.isUndef())
3792 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3793 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3794 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3795}
3796
3800 int64_t Addend;
3801};
3802
3803static std::optional<APInt> getExactInteger(const APFloat &APF,
3805 // We will use a SINT_TO_FP to materialize this constant so we should use a
3806 // signed APSInt here.
3807 APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3808 // We use an arbitrary rounding mode here. If a floating-point is an exact
3809 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3810 // the rounding mode changes the output value, then it is not an exact
3811 // integer.
3813 bool IsExact;
3814 // If it is out of signed integer range, it will return an invalid operation.
3815 // If it is not an exact integer, IsExact is false.
3816 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3818 !IsExact)
3819 return std::nullopt;
3820 return ValInt.extractBits(BitWidth, 0);
3821}
3822
3823// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3824// to the (non-zero) step S and start value X. This can be then lowered as the
3825// RVV sequence (VID * S) + X, for example.
3826// The step S is represented as an integer numerator divided by a positive
3827// denominator. Note that the implementation currently only identifies
3828// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3829// cannot detect 2/3, for example.
3830// Note that this method will also match potentially unappealing index
3831// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3832// determine whether this is worth generating code for.
3833//
3834// EltSizeInBits is the size of the type that the sequence will be calculated
3835// in, i.e. SEW for build_vectors or XLEN for address calculations.
3836static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3837 unsigned EltSizeInBits) {
3838 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3840 return std::nullopt;
3841 bool IsInteger = Op.getValueType().isInteger();
3842
3843 std::optional<unsigned> SeqStepDenom;
3844 std::optional<APInt> SeqStepNum;
3845 std::optional<APInt> SeqAddend;
3846 std::optional<std::pair<APInt, unsigned>> PrevElt;
3847 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3848
3849 // First extract the ops into a list of constant integer values. This may not
3850 // be possible for floats if they're not all representable as integers.
3851 SmallVector<std::optional<APInt>> Elts(Op.getNumOperands());
3852 const unsigned OpSize = Op.getScalarValueSizeInBits();
3853 for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3854 if (Elt.isUndef()) {
3855 Elts[Idx] = std::nullopt;
3856 continue;
3857 }
3858 if (IsInteger) {
3859 Elts[Idx] = Elt->getAsAPIntVal().trunc(OpSize).zext(EltSizeInBits);
3860 } else {
3861 auto ExactInteger =
3862 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3863 if (!ExactInteger)
3864 return std::nullopt;
3865 Elts[Idx] = *ExactInteger;
3866 }
3867 }
3868
3869 for (auto [Idx, Elt] : enumerate(Elts)) {
3870 // Assume undef elements match the sequence; we just have to be careful
3871 // when interpolating across them.
3872 if (!Elt)
3873 continue;
3874
3875 if (PrevElt) {
3876 // Calculate the step since the last non-undef element, and ensure
3877 // it's consistent across the entire sequence.
3878 unsigned IdxDiff = Idx - PrevElt->second;
3879 APInt ValDiff = *Elt - PrevElt->first;
3880
3881 // A zero-value value difference means that we're somewhere in the middle
3882 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3883 // step change before evaluating the sequence.
3884 if (ValDiff == 0)
3885 continue;
3886
3887 int64_t Remainder = ValDiff.srem(IdxDiff);
3888 // Normalize the step if it's greater than 1.
3889 if (Remainder != ValDiff.getSExtValue()) {
3890 // The difference must cleanly divide the element span.
3891 if (Remainder != 0)
3892 return std::nullopt;
3893 ValDiff = ValDiff.sdiv(IdxDiff);
3894 IdxDiff = 1;
3895 }
3896
3897 if (!SeqStepNum)
3898 SeqStepNum = ValDiff;
3899 else if (ValDiff != SeqStepNum)
3900 return std::nullopt;
3901
3902 if (!SeqStepDenom)
3903 SeqStepDenom = IdxDiff;
3904 else if (IdxDiff != *SeqStepDenom)
3905 return std::nullopt;
3906 }
3907
3908 // Record this non-undef element for later.
3909 if (!PrevElt || PrevElt->first != *Elt)
3910 PrevElt = std::make_pair(*Elt, Idx);
3911 }
3912
3913 // We need to have logged a step for this to count as a legal index sequence.
3914 if (!SeqStepNum || !SeqStepDenom)
3915 return std::nullopt;
3916
3917 // Loop back through the sequence and validate elements we might have skipped
3918 // while waiting for a valid step. While doing this, log any sequence addend.
3919 for (auto [Idx, Elt] : enumerate(Elts)) {
3920 if (!Elt)
3921 continue;
3922 APInt ExpectedVal =
3923 (APInt(EltSizeInBits, Idx, /*isSigned=*/false, /*implicitTrunc=*/true) *
3924 *SeqStepNum)
3925 .sdiv(*SeqStepDenom);
3926
3927 APInt Addend = *Elt - ExpectedVal;
3928 if (!SeqAddend)
3929 SeqAddend = Addend;
3930 else if (Addend != SeqAddend)
3931 return std::nullopt;
3932 }
3933
3934 assert(SeqAddend && "Must have an addend if we have a step");
3935
3936 return VIDSequence{SeqStepNum->getSExtValue(), *SeqStepDenom,
3937 SeqAddend->getSExtValue()};
3938}
3939
3940// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3941// and lower it as a VRGATHER_VX_VL from the source vector.
3942static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3943 SelectionDAG &DAG,
3944 const RISCVSubtarget &Subtarget) {
3945 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3946 return SDValue();
3947 SDValue Src = SplatVal.getOperand(0);
3948 // Don't perform this optimization for i1 vectors, or if the element types are
3949 // different
3950 // FIXME: Support i1 vectors, maybe by promoting to i8?
3951 MVT EltTy = VT.getVectorElementType();
3952 if (EltTy == MVT::i1 ||
3953 !DAG.getTargetLoweringInfo().isTypeLegal(Src.getValueType()))
3954 return SDValue();
3955 MVT SrcVT = Src.getSimpleValueType();
3956 if (EltTy != SrcVT.getVectorElementType())
3957 return SDValue();
3958 SDValue Idx = SplatVal.getOperand(1);
3959 // The index must be a legal type.
3960 if (Idx.getValueType() != Subtarget.getXLenVT())
3961 return SDValue();
3962
3963 // Check that we know Idx lies within VT
3964 if (!TypeSize::isKnownLE(SrcVT.getSizeInBits(), VT.getSizeInBits())) {
3965 auto *CIdx = dyn_cast<ConstantSDNode>(Idx);
3966 if (!CIdx || CIdx->getZExtValue() >= VT.getVectorMinNumElements())
3967 return SDValue();
3968 }
3969
3970 // Convert fixed length vectors to scalable
3971 MVT ContainerVT = VT;
3972 if (VT.isFixedLengthVector())
3973 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3974
3975 MVT SrcContainerVT = SrcVT;
3976 if (SrcVT.isFixedLengthVector()) {
3977 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3978 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3979 }
3980
3981 // Put Vec in a VT sized vector
3982 if (SrcContainerVT.getVectorMinNumElements() <
3983 ContainerVT.getVectorMinNumElements())
3984 Src = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), Src, 0);
3985 else
3986 Src = DAG.getExtractSubvector(DL, ContainerVT, Src, 0);
3987
3988 // We checked that Idx fits inside VT earlier
3989 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3990 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Src,
3991 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3992 if (VT.isFixedLengthVector())
3993 Gather = convertFromScalableVector(VT, Gather, DAG, Subtarget);
3994 return Gather;
3995}
3996
3998 const RISCVSubtarget &Subtarget) {
3999 MVT VT = Op.getSimpleValueType();
4000 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4001
4002 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4003
4004 SDLoc DL(Op);
4005 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4006
4007 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
4008 int64_t StepNumerator = SimpleVID->StepNumerator;
4009 unsigned StepDenominator = SimpleVID->StepDenominator;
4010 int64_t Addend = SimpleVID->Addend;
4011
4012 assert(StepNumerator != 0 && "Invalid step");
4013 bool Negate = false;
4014 int64_t SplatStepVal = StepNumerator;
4015 unsigned StepOpcode = ISD::MUL;
4016 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
4017 // anyway as the shift of 63 won't fit in uimm5.
4018 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
4019 isPowerOf2_64(std::abs(StepNumerator))) {
4020 Negate = StepNumerator < 0;
4021 StepOpcode = ISD::SHL;
4022 SplatStepVal = Log2_64(std::abs(StepNumerator));
4023 }
4024
4025 // Only emit VIDs with suitably-small steps. We use imm5 as a threshold
4026 // since it's the immediate value many RVV instructions accept. There is
4027 // no vmul.vi instruction so ensure multiply constant can fit in a
4028 // single addi instruction. For the addend, we allow up to 32 bits..
4029 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
4030 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
4031 isPowerOf2_32(StepDenominator) &&
4032 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<32>(Addend)) {
4033 MVT VIDVT =
4035 MVT VIDContainerVT =
4036 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
4037 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
4038 // Convert right out of the scalable type so we can use standard ISD
4039 // nodes for the rest of the computation. If we used scalable types with
4040 // these, we'd lose the fixed-length vector info and generate worse
4041 // vsetvli code.
4042 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
4043 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
4044 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
4045 SDValue SplatStep = DAG.getSignedConstant(SplatStepVal, DL, VIDVT);
4046 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
4047 }
4048 if (StepDenominator != 1) {
4049 SDValue SplatStep =
4050 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
4051 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
4052 }
4053 if (Addend != 0 || Negate) {
4054 SDValue SplatAddend = DAG.getSignedConstant(Addend, DL, VIDVT);
4055 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
4056 VID);
4057 }
4058 if (VT.isFloatingPoint()) {
4059 // TODO: Use vfwcvt to reduce register pressure.
4060 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
4061 }
4062 return VID;
4063 }
4064 }
4065
4066 return SDValue();
4067}
4068
4069/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
4070/// which constitute a large proportion of the elements. In such cases we can
4071/// splat a vector with the dominant element and make up the shortfall with
4072/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
4073/// Note that this includes vectors of 2 elements by association. The
4074/// upper-most element is the "dominant" one, allowing us to use a splat to
4075/// "insert" the upper element, and an insert of the lower element at position
4076/// 0, which improves codegen.
4078 const RISCVSubtarget &Subtarget) {
4079 MVT VT = Op.getSimpleValueType();
4080 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4081
4082 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4083
4084 SDLoc DL(Op);
4085 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4086
4087 MVT XLenVT = Subtarget.getXLenVT();
4088 unsigned NumElts = Op.getNumOperands();
4089
4090 SDValue DominantValue;
4091 unsigned MostCommonCount = 0;
4092 DenseMap<SDValue, unsigned> ValueCounts;
4093 unsigned NumUndefElts =
4094 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
4095
4096 // Track the number of scalar loads we know we'd be inserting, estimated as
4097 // any non-zero floating-point constant. Other kinds of element are either
4098 // already in registers or are materialized on demand. The threshold at which
4099 // a vector load is more desirable than several scalar materializion and
4100 // vector-insertion instructions is not known.
4101 unsigned NumScalarLoads = 0;
4102
4103 for (SDValue V : Op->op_values()) {
4104 if (V.isUndef())
4105 continue;
4106
4107 unsigned &Count = ValueCounts[V];
4108 if (0 == Count)
4109 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
4110 NumScalarLoads += !CFP->isExactlyValue(+0.0);
4111
4112 // Is this value dominant? In case of a tie, prefer the highest element as
4113 // it's cheaper to insert near the beginning of a vector than it is at the
4114 // end.
4115 if (++Count >= MostCommonCount) {
4116 DominantValue = V;
4117 MostCommonCount = Count;
4118 }
4119 }
4120
4121 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
4122 unsigned NumDefElts = NumElts - NumUndefElts;
4123 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
4124
4125 // Don't perform this optimization when optimizing for size, since
4126 // materializing elements and inserting them tends to cause code bloat.
4127 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
4128 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
4129 ((MostCommonCount > DominantValueCountThreshold) ||
4130 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
4131 // Start by splatting the most common element.
4132 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
4133
4134 DenseSet<SDValue> Processed{DominantValue};
4135
4136 // We can handle an insert into the last element (of a splat) via
4137 // v(f)slide1down. This is slightly better than the vslideup insert
4138 // lowering as it avoids the need for a vector group temporary. It
4139 // is also better than using vmerge.vx as it avoids the need to
4140 // materialize the mask in a vector register.
4141 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
4142 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
4143 LastOp != DominantValue) {
4144 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4145 auto OpCode =
4146 VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
4147 if (!VT.isFloatingPoint())
4148 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
4149 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4150 LastOp, Mask, VL);
4151 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
4152 Processed.insert(LastOp);
4153 }
4154
4155 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
4156 for (const auto &OpIdx : enumerate(Op->ops())) {
4157 const SDValue &V = OpIdx.value();
4158 if (V.isUndef() || !Processed.insert(V).second)
4159 continue;
4160 if (ValueCounts[V] == 1) {
4161 Vec = DAG.getInsertVectorElt(DL, Vec, V, OpIdx.index());
4162 } else {
4163 // Blend in all instances of this value using a VSELECT, using a
4164 // mask where each bit signals whether that element is the one
4165 // we're after.
4167 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
4168 return DAG.getConstant(V == V1, DL, XLenVT);
4169 });
4170 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
4171 DAG.getBuildVector(SelMaskTy, DL, Ops),
4172 DAG.getSplatBuildVector(VT, DL, V), Vec);
4173 }
4174 }
4175
4176 return Vec;
4177 }
4178
4179 return SDValue();
4180}
4181
4183 const RISCVSubtarget &Subtarget) {
4184 MVT VT = Op.getSimpleValueType();
4185 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4186
4187 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4188
4189 SDLoc DL(Op);
4190 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4191
4192 MVT XLenVT = Subtarget.getXLenVT();
4193 unsigned NumElts = Op.getNumOperands();
4194
4195 if (VT.getVectorElementType() == MVT::i1) {
4196 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
4197 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
4198 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
4199 }
4200
4201 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
4202 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
4203 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
4204 }
4205
4206 // Lower constant mask BUILD_VECTORs via an integer vector type, in
4207 // scalar integer chunks whose bit-width depends on the number of mask
4208 // bits and XLEN.
4209 // First, determine the most appropriate scalar integer type to use. This
4210 // is at most XLenVT, but may be shrunk to a smaller vector element type
4211 // according to the size of the final vector - use i8 chunks rather than
4212 // XLenVT if we're producing a v8i1. This results in more consistent
4213 // codegen across RV32 and RV64.
4214 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
4215 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
4216 // If we have to use more than one INSERT_VECTOR_ELT then this
4217 // optimization is likely to increase code size; avoid performing it in
4218 // such a case. We can use a load from a constant pool in this case.
4219 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
4220 return SDValue();
4221 // Now we can create our integer vector type. Note that it may be larger
4222 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
4223 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
4224 MVT IntegerViaVecVT =
4225 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
4226 IntegerViaVecElts);
4227
4228 uint64_t Bits = 0;
4229 unsigned BitPos = 0, IntegerEltIdx = 0;
4230 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
4231
4232 for (unsigned I = 0; I < NumElts;) {
4233 SDValue V = Op.getOperand(I);
4234 bool BitValue = !V.isUndef() && V->getAsZExtVal();
4235 Bits |= ((uint64_t)BitValue << BitPos);
4236 ++BitPos;
4237 ++I;
4238
4239 // Once we accumulate enough bits to fill our scalar type or process the
4240 // last element, insert into our vector and clear our accumulated data.
4241 if (I % NumViaIntegerBits == 0 || I == NumElts) {
4242 if (NumViaIntegerBits <= 32)
4243 Bits = SignExtend64<32>(Bits);
4244 SDValue Elt = DAG.getSignedConstant(Bits, DL, XLenVT);
4245 Elts[IntegerEltIdx] = Elt;
4246 Bits = 0;
4247 BitPos = 0;
4248 IntegerEltIdx++;
4249 }
4250 }
4251
4252 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
4253
4254 if (NumElts < NumViaIntegerBits) {
4255 // If we're producing a smaller vector than our minimum legal integer
4256 // type, bitcast to the equivalent (known-legal) mask type, and extract
4257 // our final mask.
4258 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
4259 Vec = DAG.getBitcast(MVT::v8i1, Vec);
4260 Vec = DAG.getExtractSubvector(DL, VT, Vec, 0);
4261 } else {
4262 // Else we must have produced an integer type with the same size as the
4263 // mask type; bitcast for the final result.
4264 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
4265 Vec = DAG.getBitcast(VT, Vec);
4266 }
4267
4268 return Vec;
4269 }
4270
4272 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
4273 : RISCVISD::VMV_V_X_VL;
4274 if (!VT.isFloatingPoint())
4275 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4276 Splat =
4277 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
4278 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4279 }
4280
4281 // Try and match index sequences, which we can lower to the vid instruction
4282 // with optional modifications. An all-undef vector is matched by
4283 // getSplatValue, above.
4284 if (SDValue Res = lowerBuildVectorViaVID(Op, DAG, Subtarget))
4285 return Res;
4286
4287 // For very small build_vectors, use a single scalar insert of a constant.
4288 // TODO: Base this on constant rematerialization cost, not size.
4289 const unsigned EltBitSize = VT.getScalarSizeInBits();
4290 if (VT.getSizeInBits() <= 32 &&
4292 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
4293 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
4294 "Unexpected sequence type");
4295 // If we can use the original VL with the modified element type, this
4296 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
4297 // be moved into InsertVSETVLI?
4298 unsigned ViaVecLen =
4299 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
4300 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
4301
4302 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
4303 uint64_t SplatValue = 0;
4304 // Construct the amalgamated value at this larger vector type.
4305 for (const auto &OpIdx : enumerate(Op->op_values())) {
4306 const auto &SeqV = OpIdx.value();
4307 if (!SeqV.isUndef())
4308 SplatValue |=
4309 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
4310 }
4311
4312 // On RV64, sign-extend from 32 to 64 bits where possible in order to
4313 // achieve better constant materializion.
4314 // On RV32, we need to sign-extend to use getSignedConstant.
4315 if (ViaIntVT == MVT::i32)
4316 SplatValue = SignExtend64<32>(SplatValue);
4317
4318 SDValue Vec = DAG.getInsertVectorElt(
4319 DL, DAG.getUNDEF(ViaVecVT),
4320 DAG.getSignedConstant(SplatValue, DL, XLenVT), 0);
4321 if (ViaVecLen != 1)
4322 Vec = DAG.getExtractSubvector(DL, MVT::getVectorVT(ViaIntVT, 1), Vec, 0);
4323 return DAG.getBitcast(VT, Vec);
4324 }
4325
4326
4327 // Attempt to detect "hidden" splats, which only reveal themselves as splats
4328 // when re-interpreted as a vector with a larger element type. For example,
4329 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
4330 // could be instead splat as
4331 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
4332 // TODO: This optimization could also work on non-constant splats, but it
4333 // would require bit-manipulation instructions to construct the splat value.
4334 SmallVector<SDValue> Sequence;
4335 const auto *BV = cast<BuildVectorSDNode>(Op);
4336 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
4338 BV->getRepeatedSequence(Sequence) &&
4339 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
4340 unsigned SeqLen = Sequence.size();
4341 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
4342 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
4343 ViaIntVT == MVT::i64) &&
4344 "Unexpected sequence type");
4345
4346 // If we can use the original VL with the modified element type, this
4347 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
4348 // be moved into InsertVSETVLI?
4349 const unsigned RequiredVL = NumElts / SeqLen;
4350 const unsigned ViaVecLen =
4351 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
4352 NumElts : RequiredVL;
4353 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
4354
4355 unsigned EltIdx = 0;
4356 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
4357 uint64_t SplatValue = 0;
4358 // Construct the amalgamated value which can be splatted as this larger
4359 // vector type.
4360 for (const auto &SeqV : Sequence) {
4361 if (!SeqV.isUndef())
4362 SplatValue |=
4363 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
4364 EltIdx++;
4365 }
4366
4367 // On RV64, sign-extend from 32 to 64 bits where possible in order to
4368 // achieve better constant materializion.
4369 // On RV32, we need to sign-extend to use getSignedConstant.
4370 if (ViaIntVT == MVT::i32)
4371 SplatValue = SignExtend64<32>(SplatValue);
4372
4373 // Since we can't introduce illegal i64 types at this stage, we can only
4374 // perform an i64 splat on RV32 if it is its own sign-extended value. That
4375 // way we can use RVV instructions to splat.
4376 assert((ViaIntVT.bitsLE(XLenVT) ||
4377 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
4378 "Unexpected bitcast sequence");
4379 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
4380 SDValue ViaVL =
4381 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
4382 MVT ViaContainerVT =
4383 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
4384 SDValue Splat =
4385 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
4386 DAG.getUNDEF(ViaContainerVT),
4387 DAG.getSignedConstant(SplatValue, DL, XLenVT), ViaVL);
4388 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
4389 if (ViaVecLen != RequiredVL)
4391 DL, MVT::getVectorVT(ViaIntVT, RequiredVL), Splat, 0);
4392 return DAG.getBitcast(VT, Splat);
4393 }
4394 }
4395
4396 // If the number of signbits allows, see if we can lower as a <N x i8>.
4397 // Our main goal here is to reduce LMUL (and thus work) required to
4398 // build the constant, but we will also narrow if the resulting
4399 // narrow vector is known to materialize cheaply.
4400 // TODO: We really should be costing the smaller vector. There are
4401 // profitable cases this misses.
4402 if (EltBitSize > 8 && VT.isInteger() &&
4403 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen()) &&
4404 DAG.ComputeMaxSignificantBits(Op) <= 8) {
4405 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
4406 DL, Op->ops());
4407 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
4408 Source, DAG, Subtarget);
4409 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
4410 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4411 }
4412
4413 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4414 return Res;
4415
4416 // For constant vectors, use generic constant pool lowering. Otherwise,
4417 // we'd have to materialize constants in GPRs just to move them into the
4418 // vector.
4419 return SDValue();
4420}
4421
4422static unsigned getPACKOpcode(unsigned DestBW,
4423 const RISCVSubtarget &Subtarget) {
4424 switch (DestBW) {
4425 default:
4426 llvm_unreachable("Unsupported pack size");
4427 case 16:
4428 return RISCV::PACKH;
4429 case 32:
4430 return Subtarget.is64Bit() ? RISCV::PACKW : RISCV::PACK;
4431 case 64:
4432 assert(Subtarget.is64Bit());
4433 return RISCV::PACK;
4434 }
4435}
4436
4437/// Double the element size of the build vector to reduce the number
4438/// of vslide1down in the build vector chain. In the worst case, this
4439/// trades three scalar operations for 1 vector operation. Scalar
4440/// operations are generally lower latency, and for out-of-order cores
4441/// we also benefit from additional parallelism.
4443 const RISCVSubtarget &Subtarget) {
4444 SDLoc DL(Op);
4445 MVT VT = Op.getSimpleValueType();
4446 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4447 MVT ElemVT = VT.getVectorElementType();
4448 if (!ElemVT.isInteger())
4449 return SDValue();
4450
4451 // TODO: Relax these architectural restrictions, possibly with costing
4452 // of the actual instructions required.
4453 if (!Subtarget.hasStdExtZbb() || !Subtarget.hasStdExtZba())
4454 return SDValue();
4455
4456 unsigned NumElts = VT.getVectorNumElements();
4457 unsigned ElemSizeInBits = ElemVT.getSizeInBits();
4458 if (ElemSizeInBits >= std::min(Subtarget.getELen(), Subtarget.getXLen()) ||
4459 NumElts % 2 != 0)
4460 return SDValue();
4461
4462 // Produce [B,A] packed into a type twice as wide. Note that all
4463 // scalars are XLenVT, possibly masked (see below).
4464 MVT XLenVT = Subtarget.getXLenVT();
4465 SDValue Mask = DAG.getConstant(
4466 APInt::getLowBitsSet(XLenVT.getSizeInBits(), ElemSizeInBits), DL, XLenVT);
4467 auto pack = [&](SDValue A, SDValue B) {
4468 // Bias the scheduling of the inserted operations to near the
4469 // definition of the element - this tends to reduce register
4470 // pressure overall.
4471 SDLoc ElemDL(B);
4472 if (Subtarget.hasStdExtZbkb())
4473 // Note that we're relying on the high bits of the result being
4474 // don't care. For PACKW, the result is *sign* extended.
4475 return SDValue(
4476 DAG.getMachineNode(getPACKOpcode(ElemSizeInBits * 2, Subtarget),
4477 ElemDL, XLenVT, A, B),
4478 0);
4479
4480 A = DAG.getNode(ISD::AND, SDLoc(A), XLenVT, A, Mask);
4481 B = DAG.getNode(ISD::AND, SDLoc(B), XLenVT, B, Mask);
4482 SDValue ShtAmt = DAG.getConstant(ElemSizeInBits, ElemDL, XLenVT);
4483 return DAG.getNode(ISD::OR, ElemDL, XLenVT, A,
4484 DAG.getNode(ISD::SHL, ElemDL, XLenVT, B, ShtAmt),
4486 };
4487
4488 SmallVector<SDValue> NewOperands;
4489 NewOperands.reserve(NumElts / 2);
4490 for (unsigned i = 0; i < VT.getVectorNumElements(); i += 2)
4491 NewOperands.push_back(pack(Op.getOperand(i), Op.getOperand(i + 1)));
4492 assert(NumElts == NewOperands.size() * 2);
4493 MVT WideVT = MVT::getIntegerVT(ElemSizeInBits * 2);
4494 MVT WideVecVT = MVT::getVectorVT(WideVT, NumElts / 2);
4495 return DAG.getNode(ISD::BITCAST, DL, VT,
4496 DAG.getBuildVector(WideVecVT, DL, NewOperands));
4497}
4498
4500 const RISCVSubtarget &Subtarget) {
4501 MVT VT = Op.getSimpleValueType();
4502 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4503
4504 MVT EltVT = VT.getVectorElementType();
4505 MVT XLenVT = Subtarget.getXLenVT();
4506
4507 SDLoc DL(Op);
4508
4509 if (Subtarget.isRV32() && Subtarget.enablePExtCodeGen()) {
4510 if (VT != MVT::v4i8)
4511 return SDValue();
4512
4513 // <4 x i8> BUILD_VECTOR a, b, c, d -> PACK(PPACK.DH pair(a, b), pair(c, d))
4514 SDValue Val0 =
4515 DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4i8, Op->getOperand(0));
4516 SDValue Val1 =
4517 DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4i8, Op->getOperand(1));
4518 SDValue Val2 =
4519 DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4i8, Op->getOperand(2));
4520 SDValue Val3 =
4521 DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4i8, Op->getOperand(3));
4522 SDValue PackDH =
4523 DAG.getNode(RISCVISD::PPACK_DH, DL, {MVT::v2i16, MVT::v2i16},
4524 {Val0, Val1, Val2, Val3});
4525
4526 return DAG.getNode(
4527 ISD::BITCAST, DL, MVT::v4i8,
4528 SDValue(
4529 DAG.getMachineNode(
4530 RISCV::PACK, DL, MVT::i32,
4531 {DAG.getNode(ISD::BITCAST, DL, MVT::i32, PackDH.getValue(0)),
4532 DAG.getNode(ISD::BITCAST, DL, MVT::i32, PackDH.getValue(1))}),
4533 0));
4534 }
4535
4536 // Proper support for f16 requires Zvfh. bf16 always requires special
4537 // handling. We need to cast the scalar to integer and create an integer
4538 // build_vector.
4539 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) || EltVT == MVT::bf16) {
4540 MVT IVT = VT.changeVectorElementType(MVT::i16);
4541 SmallVector<SDValue, 16> NewOps(Op.getNumOperands());
4542 for (const auto &[I, U] : enumerate(Op->ops())) {
4543 SDValue Elem = U.get();
4544 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4545 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin())) {
4546 // Called by LegalizeDAG, we need to use XLenVT operations since we
4547 // can't create illegal types.
4548 if (auto *C = dyn_cast<ConstantFPSDNode>(Elem)) {
4549 // Manually constant fold so the integer build_vector can be lowered
4550 // better. Waiting for DAGCombine will be too late.
4551 APInt V =
4552 C->getValueAPF().bitcastToAPInt().sext(XLenVT.getSizeInBits());
4553 NewOps[I] = DAG.getConstant(V, DL, XLenVT);
4554 } else {
4555 NewOps[I] = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Elem);
4556 }
4557 } else {
4558 // Called by scalar type legalizer, we can use i16.
4559 NewOps[I] = DAG.getBitcast(MVT::i16, Op.getOperand(I));
4560 }
4561 }
4562 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, DL, IVT, NewOps);
4563 return DAG.getBitcast(VT, Res);
4564 }
4565
4566 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
4568 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
4569
4570 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4571
4572 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4573
4574 if (VT.getVectorElementType() == MVT::i1) {
4575 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
4576 // vector type, we have a legal equivalently-sized i8 type, so we can use
4577 // that.
4578 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
4579 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
4580
4581 SDValue WideVec;
4583 // For a splat, perform a scalar truncate before creating the wider
4584 // vector.
4585 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
4586 DAG.getConstant(1, DL, Splat.getValueType()));
4587 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
4588 } else {
4589 SmallVector<SDValue, 8> Ops(Op->op_values());
4590 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
4591 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
4592 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
4593 }
4594
4595 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
4596 }
4597
4599 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
4600 return Gather;
4601
4602 // Prefer vmv.s.x/vfmv.s.f if legal to reduce work and register
4603 // pressure at high LMUL.
4604 if (all_of(Op->ops().drop_front(),
4605 [](const SDUse &U) { return U.get().isUndef(); })) {
4606 unsigned Opc =
4607 VT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
4608 if (!VT.isFloatingPoint())
4609 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4610 Splat = DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4611 Splat, VL);
4612 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4613 }
4614
4615 unsigned Opc =
4616 VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
4617 if (!VT.isFloatingPoint())
4618 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4619 Splat =
4620 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
4621 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4622 }
4623
4624 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4625 return Res;
4626
4627 // If we're compiling for an exact VLEN value, we can split our work per
4628 // register in the register group.
4629 if (const auto VLen = Subtarget.getRealVLen();
4630 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
4631 MVT ElemVT = VT.getVectorElementType();
4632 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4633 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4634 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4635 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4636 assert(M1VT == RISCVTargetLowering::getM1VT(M1VT));
4637
4638 // The following semantically builds up a fixed length concat_vector
4639 // of the component build_vectors. We eagerly lower to scalable and
4640 // insert_subvector here to avoid DAG combining it back to a large
4641 // build_vector.
4642 SmallVector<SDValue> BuildVectorOps(Op->ops());
4643 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4644 SDValue Vec = DAG.getUNDEF(ContainerVT);
4645 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
4646 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
4647 SDValue SubBV =
4648 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
4649 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
4650 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
4651 Vec = DAG.getInsertSubvector(DL, Vec, SubBV, InsertIdx);
4652 }
4653 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4654 }
4655
4656 // If we're about to resort to vslide1down (or stack usage), pack our
4657 // elements into the widest scalar type we can. This will force a VL/VTYPE
4658 // toggle, but reduces the critical path, the number of vslide1down ops
4659 // required, and possibly enables scalar folds of the values.
4660 if (SDValue Res = lowerBuildVectorViaPacking(Op, DAG, Subtarget))
4661 return Res;
4662
4663 // For m1 vectors, if we have non-undef values in both halves of our vector,
4664 // split the vector into low and high halves, build them separately, then
4665 // use a vselect to combine them. For long vectors, this cuts the critical
4666 // path of the vslide1down sequence in half, and gives us an opportunity
4667 // to special case each half independently. Note that we don't change the
4668 // length of the sub-vectors here, so if both fallback to the generic
4669 // vslide1down path, we should be able to fold the vselect into the final
4670 // vslidedown (for the undef tail) for the first half w/ masking.
4671 unsigned NumElts = VT.getVectorNumElements();
4672 unsigned NumUndefElts =
4673 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
4674 unsigned NumDefElts = NumElts - NumUndefElts;
4675 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
4676 ContainerVT.bitsLE(RISCVTargetLowering::getM1VT(ContainerVT))) {
4677 SmallVector<SDValue> SubVecAOps, SubVecBOps;
4678 SmallVector<SDValue> MaskVals;
4679 SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
4680 SubVecAOps.reserve(NumElts);
4681 SubVecBOps.reserve(NumElts);
4682 for (const auto &[Idx, U] : enumerate(Op->ops())) {
4683 SDValue Elem = U.get();
4684 if (Idx < NumElts / 2) {
4685 SubVecAOps.push_back(Elem);
4686 SubVecBOps.push_back(UndefElem);
4687 } else {
4688 SubVecAOps.push_back(UndefElem);
4689 SubVecBOps.push_back(Elem);
4690 }
4691 bool SelectMaskVal = (Idx < NumElts / 2);
4692 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4693 }
4694 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
4695 MaskVals.size() == NumElts);
4696
4697 SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
4698 SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
4699 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4700 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
4701 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
4702 }
4703
4704 // Cap the cost at a value linear to the number of elements in the vector.
4705 // The default lowering is to use the stack. The vector store + scalar loads
4706 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
4707 // being (at least) linear in LMUL. As a result, using the vslidedown
4708 // lowering for every element ends up being VL*LMUL..
4709 // TODO: Should we be directly costing the stack alternative? Doing so might
4710 // give us a more accurate upper bound.
4711 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
4712
4713 // TODO: unify with TTI getSlideCost.
4714 InstructionCost PerSlideCost = 1;
4715 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
4716 default: break;
4717 case RISCVVType::LMUL_2:
4718 PerSlideCost = 2;
4719 break;
4720 case RISCVVType::LMUL_4:
4721 PerSlideCost = 4;
4722 break;
4723 case RISCVVType::LMUL_8:
4724 PerSlideCost = 8;
4725 break;
4726 }
4727
4728 // TODO: Should we be using the build instseq then cost + evaluate scheme
4729 // we use for integer constants here?
4730 unsigned UndefCount = 0;
4731 for (const SDValue &V : Op->ops()) {
4732 if (V.isUndef()) {
4733 UndefCount++;
4734 continue;
4735 }
4736 if (UndefCount) {
4737 LinearBudget -= PerSlideCost;
4738 UndefCount = 0;
4739 }
4740 LinearBudget -= PerSlideCost;
4741 }
4742 if (UndefCount) {
4743 LinearBudget -= PerSlideCost;
4744 }
4745
4746 if (LinearBudget < 0)
4747 return SDValue();
4748
4749 assert((!VT.isFloatingPoint() ||
4750 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4751 "Illegal type which will result in reserved encoding");
4752
4753 const unsigned Policy = RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC;
4754
4755 // General case: splat the first operand and slide other operands down one
4756 // by one to form a vector. Alternatively, if every operand is an
4757 // extraction from element 0 of a vector, we use that vector from the last
4758 // extraction as the start value and slide up instead of slide down. Such that
4759 // (1) we can avoid the initial splat (2) we can turn those vslide1up into
4760 // vslideup of 1 later and eliminate the vector to scalar movement, which is
4761 // something we cannot do with vslide1down/vslidedown.
4762 // Of course, using vslide1up/vslideup might increase the register pressure,
4763 // and that's why we conservatively limit to cases where every operand is an
4764 // extraction from the first element.
4765 SmallVector<SDValue> Operands(Op->op_begin(), Op->op_end());
4766 SDValue EVec;
4767 bool SlideUp = false;
4768 auto getVSlide = [&](EVT ContainerVT, SDValue Passthru, SDValue Vec,
4769 SDValue Offset, SDValue Mask, SDValue VL) -> SDValue {
4770 if (SlideUp)
4771 return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset,
4772 Mask, VL, Policy);
4773 return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset,
4774 Mask, VL, Policy);
4775 };
4776
4777 // The reason we don't use all_of here is because we're also capturing EVec
4778 // from the last non-undef operand. If the std::execution_policy of the
4779 // underlying std::all_of is anything but std::sequenced_policy we might
4780 // capture the wrong EVec.
4781 for (SDValue V : Operands) {
4782 using namespace SDPatternMatch;
4783 SlideUp = V.isUndef() || sd_match(V, m_ExtractElt(m_Value(EVec), m_Zero()));
4784 if (!SlideUp)
4785 break;
4786 }
4787
4788 // Do not slideup if the element type of EVec is different.
4789 if (SlideUp) {
4790 MVT EVecEltVT = EVec.getSimpleValueType().getVectorElementType();
4791 MVT ContainerEltVT = ContainerVT.getVectorElementType();
4792 if (EVecEltVT != ContainerEltVT)
4793 SlideUp = false;
4794 }
4795
4796 if (SlideUp) {
4797 MVT EVecContainerVT = EVec.getSimpleValueType();
4798 // Make sure the original vector has scalable vector type.
4799 if (EVecContainerVT.isFixedLengthVector()) {
4800 EVecContainerVT =
4801 getContainerForFixedLengthVector(DAG, EVecContainerVT, Subtarget);
4802 EVec = convertToScalableVector(EVecContainerVT, EVec, DAG, Subtarget);
4803 }
4804
4805 // Adapt EVec's type into ContainerVT.
4806 if (EVecContainerVT.getVectorMinNumElements() <
4807 ContainerVT.getVectorMinNumElements())
4808 EVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), EVec, 0);
4809 else
4810 EVec = DAG.getExtractSubvector(DL, ContainerVT, EVec, 0);
4811
4812 // Reverse the elements as we're going to slide up from the last element.
4813 std::reverse(Operands.begin(), Operands.end());
4814 }
4815
4816 SDValue Vec;
4817 UndefCount = 0;
4818 for (SDValue V : Operands) {
4819 if (V.isUndef()) {
4820 UndefCount++;
4821 continue;
4822 }
4823
4824 // Start our sequence with either a TA splat or extract source in the
4825 // hopes that hardware is able to recognize there's no dependency on the
4826 // prior value of our temporary register.
4827 if (!Vec) {
4828 if (SlideUp) {
4829 Vec = EVec;
4830 } else {
4831 Vec = DAG.getSplatVector(VT, DL, V);
4832 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4833 }
4834
4835 UndefCount = 0;
4836 continue;
4837 }
4838
4839 if (UndefCount) {
4840 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4841 Vec = getVSlide(ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Offset, Mask,
4842 VL);
4843 UndefCount = 0;
4844 }
4845
4846 unsigned Opcode;
4847 if (VT.isFloatingPoint())
4848 Opcode = SlideUp ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VFSLIDE1DOWN_VL;
4849 else
4850 Opcode = SlideUp ? RISCVISD::VSLIDE1UP_VL : RISCVISD::VSLIDE1DOWN_VL;
4851
4852 if (!VT.isFloatingPoint())
4853 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
4854 Vec = DAG.getNode(Opcode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4855 V, Mask, VL);
4856 }
4857 if (UndefCount) {
4858 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4859 Vec = getVSlide(ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Offset, Mask,
4860 VL);
4861 }
4862 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4863}
4864
4865static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4867 SelectionDAG &DAG) {
4868 if (!Passthru)
4869 Passthru = DAG.getUNDEF(VT);
4871 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4872 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4873 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4874 // node in order to try and match RVV vector/scalar instructions.
4875 if ((LoC >> 31) == HiC)
4876 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4877
4878 // Use vmv.v.x with EEW=32. Use either a vsetivli or vsetvli to change
4879 // VL. This can temporarily increase VL if VL less than VLMAX.
4880 if (LoC == HiC) {
4881 SDValue NewVL;
4882 if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
4883 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
4884 else
4885 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4886 MVT InterVT =
4887 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4888 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4889 DAG.getUNDEF(InterVT), Lo, NewVL);
4890 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
4891 }
4892 }
4893
4894 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4895 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4896 isa<ConstantSDNode>(Hi.getOperand(1)) &&
4897 Hi.getConstantOperandVal(1) == 31)
4898 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4899
4900 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
4901 // even if it might be sign extended.
4902 if (Hi.isUndef())
4903 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4904
4905 // Fall back to a stack store and stride x0 vector load.
4906 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
4907 Hi, VL);
4908}
4909
4910// Called by type legalization to handle splat of i64 on RV32.
4911// FIXME: We can optimize this when the type has sign or zero bits in one
4912// of the halves.
4913static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4914 SDValue Scalar, SDValue VL,
4915 SelectionDAG &DAG) {
4916 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4917 SDValue Lo, Hi;
4918 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
4919 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4920}
4921
4922// This function lowers a splat of a scalar operand Splat with the vector
4923// length VL. It ensures the final sequence is type legal, which is useful when
4924// lowering a splat after type legalization.
4925static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4926 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4927 const RISCVSubtarget &Subtarget) {
4928 bool HasPassthru = Passthru && !Passthru.isUndef();
4929 if (!HasPassthru && !Passthru)
4930 Passthru = DAG.getUNDEF(VT);
4931
4932 MVT EltVT = VT.getVectorElementType();
4933 MVT XLenVT = Subtarget.getXLenVT();
4934
4935 if (VT.isFloatingPoint()) {
4936 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
4937 (EltVT == MVT::bf16 && !Subtarget.hasVInstructionsBF16())) {
4938 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4939 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
4940 Scalar = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Scalar);
4941 else
4942 Scalar = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Scalar);
4943 MVT IVT = VT.changeVectorElementType(MVT::i16);
4944 Passthru = DAG.getNode(ISD::BITCAST, DL, IVT, Passthru);
4945 SDValue Splat =
4946 lowerScalarSplat(Passthru, Scalar, VL, IVT, DL, DAG, Subtarget);
4947 return DAG.getNode(ISD::BITCAST, DL, VT, Splat);
4948 }
4949 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
4950 }
4951
4952 // Simplest case is that the operand needs to be promoted to XLenVT.
4953 if (Scalar.getValueType().bitsLE(XLenVT)) {
4954 // If the operand is a constant, sign extend to increase our chances
4955 // of being able to use a .vi instruction. ANY_EXTEND would become a
4956 // a zero extend and the simm5 check in isel would fail.
4957 // FIXME: Should we ignore the upper bits in isel instead?
4958 unsigned ExtOpc =
4960 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4961 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
4962 }
4963
4964 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4965 "Unexpected scalar for splat lowering!");
4966
4967 if (isOneConstant(VL) && isNullConstant(Scalar))
4968 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
4969 DAG.getConstant(0, DL, XLenVT), VL);
4970
4971 // Otherwise use the more complicated splatting algorithm.
4972 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4973}
4974
4975// This function lowers an insert of a scalar operand Scalar into lane
4976// 0 of the vector regardless of the value of VL. The contents of the
4977// remaining lanes of the result vector are unspecified. VL is assumed
4978// to be non-zero.
4980 const SDLoc &DL, SelectionDAG &DAG,
4981 const RISCVSubtarget &Subtarget) {
4982 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4983
4984 const MVT XLenVT = Subtarget.getXLenVT();
4985 SDValue Passthru = DAG.getUNDEF(VT);
4986
4987 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4988 isNullConstant(Scalar.getOperand(1))) {
4989 SDValue ExtractedVal = Scalar.getOperand(0);
4990 // The element types must be the same.
4991 if (ExtractedVal.getValueType().getVectorElementType() ==
4992 VT.getVectorElementType()) {
4993 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4994 MVT ExtractedContainerVT = ExtractedVT;
4995 if (ExtractedContainerVT.isFixedLengthVector()) {
4996 ExtractedContainerVT = getContainerForFixedLengthVector(
4997 DAG, ExtractedContainerVT, Subtarget);
4998 ExtractedVal = convertToScalableVector(ExtractedContainerVT,
4999 ExtractedVal, DAG, Subtarget);
5000 }
5001 if (ExtractedContainerVT.bitsLE(VT))
5002 return DAG.getInsertSubvector(DL, Passthru, ExtractedVal, 0);
5003 return DAG.getExtractSubvector(DL, VT, ExtractedVal, 0);
5004 }
5005 }
5006
5007 if (VT.isFloatingPoint())
5008 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
5009 VL);
5010
5011 // Avoid the tricky legalization cases by falling back to using the
5012 // splat code which already handles it gracefully.
5013 if (!Scalar.getValueType().bitsLE(XLenVT))
5014 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
5015 DAG.getConstant(1, DL, XLenVT),
5016 VT, DL, DAG, Subtarget);
5017
5018 // If the operand is a constant, sign extend to increase our chances
5019 // of being able to use a .vi instruction. ANY_EXTEND would become a
5020 // a zero extend and the simm5 check in isel would fail.
5021 // FIXME: Should we ignore the upper bits in isel instead?
5022 unsigned ExtOpc =
5024 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
5025 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
5026 VL);
5027}
5028
5029/// If concat_vector(V1,V2) could be folded away to some existing
5030/// vector source, return it. Note that the source may be larger
5031/// than the requested concat_vector (i.e. a extract_subvector
5032/// might be required.)
5034 EVT VT = V1.getValueType();
5035 assert(VT == V2.getValueType() && "argument types must match");
5036 // Both input must be extracts.
5037 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
5039 return SDValue();
5040
5041 // Extracting from the same source.
5042 SDValue Src = V1.getOperand(0);
5043 if (Src != V2.getOperand(0) ||
5044 VT.isScalableVector() != Src.getValueType().isScalableVector())
5045 return SDValue();
5046
5047 // The extracts must extract the two halves of the source.
5048 if (V1.getConstantOperandVal(1) != 0 ||
5050 return SDValue();
5051
5052 return Src;
5053}
5054
5055// Can this shuffle be performed on exactly one (possibly larger) input?
5057
5058 if (V2.isUndef())
5059 return V1;
5060
5061 unsigned NumElts = VT.getVectorNumElements();
5062 // Src needs to have twice the number of elements.
5063 // TODO: Update shuffle lowering to add the extract subvector
5064 if (SDValue Src = foldConcatVector(V1, V2);
5065 Src && Src.getValueType().getVectorNumElements() == (NumElts * 2))
5066 return Src;
5067
5068 return SDValue();
5069}
5070
5071/// Is this shuffle interleaving contiguous elements from one vector into the
5072/// even elements and contiguous elements from another vector into the odd
5073/// elements. \p EvenSrc will contain the element that should be in the first
5074/// even element. \p OddSrc will contain the element that should be in the first
5075/// odd element. These can be the first element in a source or the element half
5076/// way through the source.
5077static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
5078 int &OddSrc, const RISCVSubtarget &Subtarget) {
5079 // We need to be able to widen elements to the next larger integer type or
5080 // use the zip2a instruction at e64.
5081 if (VT.getScalarSizeInBits() >= Subtarget.getELen() &&
5082 !Subtarget.hasVendorXRivosVizip())
5083 return false;
5084
5085 int Size = Mask.size();
5086 int NumElts = VT.getVectorNumElements();
5087 assert(Size == (int)NumElts && "Unexpected mask size");
5088
5089 SmallVector<unsigned, 2> StartIndexes;
5090 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
5091 return false;
5092
5093 EvenSrc = StartIndexes[0];
5094 OddSrc = StartIndexes[1];
5095
5096 // One source should be low half of first vector.
5097 if (EvenSrc != 0 && OddSrc != 0)
5098 return false;
5099
5100 // Subvectors will be subtracted from either at the start of the two input
5101 // vectors, or at the start and middle of the first vector if it's an unary
5102 // interleave.
5103 // In both cases, HalfNumElts will be extracted.
5104 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
5105 // we'll create an illegal extract_subvector.
5106 // FIXME: We could support other values using a slidedown first.
5107 int HalfNumElts = NumElts / 2;
5108 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
5109}
5110
5111/// Is this mask representing a masked combination of two slides?
5113 std::array<std::pair<int, int>, 2> &SrcInfo) {
5114 if (!llvm::isMaskedSlidePair(Mask, Mask.size(), SrcInfo))
5115 return false;
5116
5117 // Avoid matching vselect idioms
5118 if (SrcInfo[0].second == 0 && SrcInfo[1].second == 0)
5119 return false;
5120 // Prefer vslideup as the second instruction, and identity
5121 // only as the initial instruction.
5122 if ((SrcInfo[0].second > 0 && SrcInfo[1].second < 0) ||
5123 SrcInfo[1].second == 0)
5124 std::swap(SrcInfo[0], SrcInfo[1]);
5125 assert(SrcInfo[0].first != -1 && "Must find one slide");
5126 return true;
5127}
5128
5129// Exactly matches the semantics of a previously existing custom matcher
5130// to allow migration to new matcher without changing output.
5131static bool isElementRotate(const std::array<std::pair<int, int>, 2> &SrcInfo,
5132 unsigned NumElts) {
5133 if (SrcInfo[1].first == -1)
5134 return true;
5135 return SrcInfo[0].second < 0 && SrcInfo[1].second > 0 &&
5136 SrcInfo[1].second - SrcInfo[0].second == (int)NumElts;
5137}
5138
5139static bool isAlternating(const std::array<std::pair<int, int>, 2> &SrcInfo,
5140 ArrayRef<int> Mask, unsigned Factor,
5141 bool RequiredPolarity) {
5142 int NumElts = Mask.size();
5143 for (const auto &[Idx, M] : enumerate(Mask)) {
5144 if (M < 0)
5145 continue;
5146 int Src = M >= NumElts;
5147 int Diff = (int)Idx - (M % NumElts);
5148 bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second;
5149 assert(C != (Src == SrcInfo[0].first && Diff == SrcInfo[0].second) &&
5150 "Must match exactly one of the two slides");
5151 if (RequiredPolarity != (C == (Idx / Factor) % 2))
5152 return false;
5153 }
5154 return true;
5155}
5156
5157/// Given a shuffle which can be represented as a pair of two slides,
5158/// see if it is a zipeven idiom. Zipeven is:
5159/// vs2: a0 a1 a2 a3
5160/// vs1: b0 b1 b2 b3
5161/// vd: a0 b0 a2 b2
5162static bool isZipEven(const std::array<std::pair<int, int>, 2> &SrcInfo,
5163 ArrayRef<int> Mask, unsigned &Factor) {
5164 Factor = SrcInfo[1].second;
5165 return SrcInfo[0].second == 0 && isPowerOf2_32(Factor) &&
5166 Mask.size() % Factor == 0 &&
5167 isAlternating(SrcInfo, Mask, Factor, true);
5168}
5169
5170/// Given a shuffle which can be represented as a pair of two slides,
5171/// see if it is a zipodd idiom. Zipodd is:
5172/// vs2: a0 a1 a2 a3
5173/// vs1: b0 b1 b2 b3
5174/// vd: a1 b1 a3 b3
5175/// Note that the operand order is swapped due to the way we canonicalize
5176/// the slides, so SrCInfo[0] is vs1, and SrcInfo[1] is vs2.
5177static bool isZipOdd(const std::array<std::pair<int, int>, 2> &SrcInfo,
5178 ArrayRef<int> Mask, unsigned &Factor) {
5179 Factor = -SrcInfo[1].second;
5180 return SrcInfo[0].second == 0 && isPowerOf2_32(Factor) &&
5181 Mask.size() % Factor == 0 &&
5182 isAlternating(SrcInfo, Mask, Factor, false);
5183}
5184
5185// Lower a deinterleave shuffle to SRL and TRUNC. Factor must be
5186// 2, 4, 8 and the integer type Factor-times larger than VT's
5187// element type must be a legal element type.
5188// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (Factor=2, Index=0)
5189// -> [p, q, r, s] (Factor=2, Index=1)
5191 SDValue Src, unsigned Factor,
5192 unsigned Index, SelectionDAG &DAG) {
5193 unsigned EltBits = VT.getScalarSizeInBits();
5194 ElementCount SrcEC = Src.getValueType().getVectorElementCount();
5195 MVT WideSrcVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor),
5196 SrcEC.divideCoefficientBy(Factor));
5197 MVT ResVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits),
5198 SrcEC.divideCoefficientBy(Factor));
5199 Src = DAG.getBitcast(WideSrcVT, Src);
5200
5201 unsigned Shift = Index * EltBits;
5202 SDValue Res = DAG.getNode(ISD::SRL, DL, WideSrcVT, Src,
5203 DAG.getConstant(Shift, DL, WideSrcVT));
5204 Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT, Res);
5206 Res = DAG.getBitcast(CastVT, Res);
5207 return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), Res, 0);
5208}
5209
5210/// Match a single source shuffle which is an identity except that some
5211/// particular element is repeated. This can be lowered as a masked
5212/// vrgather.vi/vx. Note that the two source form of this is handled
5213/// by the recursive splitting logic and doesn't need special handling.
5215 const RISCVSubtarget &Subtarget,
5216 SelectionDAG &DAG) {
5217
5218 SDLoc DL(SVN);
5219 MVT VT = SVN->getSimpleValueType(0);
5220 SDValue V1 = SVN->getOperand(0);
5221 assert(SVN->getOperand(1).isUndef());
5222 ArrayRef<int> Mask = SVN->getMask();
5223 const unsigned NumElts = VT.getVectorNumElements();
5224 MVT XLenVT = Subtarget.getXLenVT();
5225
5226 std::optional<int> SplatIdx;
5227 for (auto [I, M] : enumerate(Mask)) {
5228 if (M == -1 || I == (unsigned)M)
5229 continue;
5230 if (SplatIdx && *SplatIdx != M)
5231 return SDValue();
5232 SplatIdx = M;
5233 }
5234
5235 if (!SplatIdx)
5236 return SDValue();
5237
5238 SmallVector<SDValue> MaskVals;
5239 for (int MaskIndex : Mask) {
5240 bool SelectMaskVal = MaskIndex == *SplatIdx;
5241 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5242 }
5243 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5244 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5245 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5246 SDValue Splat = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT),
5247 SmallVector<int>(NumElts, *SplatIdx));
5248 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, Splat, V1);
5249}
5250
5251// Lower the following shuffle to vslidedown.
5252// a)
5253// t49: v8i8 = extract_subvector t13, Constant:i64<0>
5254// t109: v8i8 = extract_subvector t13, Constant:i64<8>
5255// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
5256// b)
5257// t69: v16i16 = extract_subvector t68, Constant:i64<0>
5258// t23: v8i16 = extract_subvector t69, Constant:i64<0>
5259// t29: v4i16 = extract_subvector t23, Constant:i64<4>
5260// t26: v8i16 = extract_subvector t69, Constant:i64<8>
5261// t30: v4i16 = extract_subvector t26, Constant:i64<0>
5262// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
5264 SDValue V1, SDValue V2,
5265 ArrayRef<int> Mask,
5266 const RISCVSubtarget &Subtarget,
5267 SelectionDAG &DAG) {
5268 auto findNonEXTRACT_SUBVECTORParent =
5269 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
5270 uint64_t Offset = 0;
5271 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
5272 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
5273 // a scalable vector. But we don't want to match the case.
5274 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
5275 Offset += Parent.getConstantOperandVal(1);
5276 Parent = Parent.getOperand(0);
5277 }
5278 return std::make_pair(Parent, Offset);
5279 };
5280
5281 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
5282 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
5283
5284 // Extracting from the same source.
5285 SDValue Src = V1Src;
5286 if (Src != V2Src)
5287 return SDValue();
5288
5289 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
5290 SmallVector<int, 16> NewMask(Mask);
5291 for (size_t i = 0; i != NewMask.size(); ++i) {
5292 if (NewMask[i] == -1)
5293 continue;
5294
5295 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
5296 NewMask[i] = NewMask[i] + V1IndexOffset;
5297 } else {
5298 // Minus NewMask.size() is needed. Otherwise, the b case would be
5299 // <5,6,7,12> instead of <5,6,7,8>.
5300 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
5301 }
5302 }
5303
5304 // First index must be known and non-zero. It will be used as the slidedown
5305 // amount.
5306 if (NewMask[0] <= 0)
5307 return SDValue();
5308
5309 // NewMask is also continuous.
5310 for (unsigned i = 1; i != NewMask.size(); ++i)
5311 if (NewMask[i - 1] + 1 != NewMask[i])
5312 return SDValue();
5313
5314 MVT XLenVT = Subtarget.getXLenVT();
5315 MVT SrcVT = Src.getSimpleValueType();
5316 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
5317 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
5318 SDValue Slidedown =
5319 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
5320 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
5321 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
5322 return DAG.getExtractSubvector(
5323 DL, VT, convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget), 0);
5324}
5325
5326// Because vslideup leaves the destination elements at the start intact, we can
5327// use it to perform shuffles that insert subvectors:
5328//
5329// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
5330// ->
5331// vsetvli zero, 8, e8, mf2, ta, ma
5332// vslideup.vi v8, v9, 4
5333//
5334// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
5335// ->
5336// vsetvli zero, 5, e8, mf2, tu, ma
5337// vslideup.v1 v8, v9, 2
5339 SDValue V1, SDValue V2,
5340 ArrayRef<int> Mask,
5341 const RISCVSubtarget &Subtarget,
5342 SelectionDAG &DAG) {
5343 unsigned NumElts = VT.getVectorNumElements();
5344 int NumSubElts, Index;
5345 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
5346 Index))
5347 return SDValue();
5348
5349 bool OpsSwapped = Mask[Index] < (int)NumElts;
5350 SDValue InPlace = OpsSwapped ? V2 : V1;
5351 SDValue ToInsert = OpsSwapped ? V1 : V2;
5352
5353 MVT XLenVT = Subtarget.getXLenVT();
5354 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5355 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
5356 // We slide up by the index that the subvector is being inserted at, and set
5357 // VL to the index + the number of elements being inserted.
5358 unsigned Policy =
5360 // If the we're adding a suffix to the in place vector, i.e. inserting right
5361 // up to the very end of it, then we don't actually care about the tail.
5362 if (NumSubElts + Index >= (int)NumElts)
5363 Policy |= RISCVVType::TAIL_AGNOSTIC;
5364
5365 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
5366 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
5367 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
5368
5369 SDValue Res;
5370 // If we're inserting into the lowest elements, use a tail undisturbed
5371 // vmv.v.v.
5372 if (Index == 0)
5373 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
5374 VL);
5375 else
5376 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
5377 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
5378 return convertFromScalableVector(VT, Res, DAG, Subtarget);
5379}
5380
5381/// Match v(f)slide1up/down idioms. These operations involve sliding
5382/// N-1 elements to make room for an inserted scalar at one end.
5384 SDValue V1, SDValue V2,
5385 ArrayRef<int> Mask,
5386 const RISCVSubtarget &Subtarget,
5387 SelectionDAG &DAG) {
5388 bool OpsSwapped = false;
5389 if (!isa<BuildVectorSDNode>(V1)) {
5390 if (!isa<BuildVectorSDNode>(V2))
5391 return SDValue();
5392 std::swap(V1, V2);
5393 OpsSwapped = true;
5394 }
5395 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
5396 if (!Splat)
5397 return SDValue();
5398
5399 // Return true if the mask could describe a slide of Mask.size() - 1
5400 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
5401 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
5402 const unsigned S = (Offset > 0) ? 0 : -Offset;
5403 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
5404 for (unsigned i = S; i != E; ++i)
5405 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
5406 return false;
5407 return true;
5408 };
5409
5410 const unsigned NumElts = VT.getVectorNumElements();
5411 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
5412 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
5413 return SDValue();
5414
5415 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
5416 // Inserted lane must come from splat, undef scalar is legal but not profitable.
5417 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
5418 return SDValue();
5419
5420 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5421 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5422
5423 // zvfhmin and zvfbfmin don't have vfslide1{down,up}.vf so use fmv.x.h +
5424 // vslide1{down,up}.vx instead.
5425 if (VT.getVectorElementType() == MVT::bf16 ||
5426 (VT.getVectorElementType() == MVT::f16 &&
5427 !Subtarget.hasVInstructionsF16())) {
5428 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
5429 Splat =
5430 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Splat);
5431 V2 = DAG.getBitcast(
5432 IntVT, convertToScalableVector(ContainerVT, V2, DAG, Subtarget));
5433 SDValue Vec = DAG.getNode(
5434 IsVSlidedown ? RISCVISD::VSLIDE1DOWN_VL : RISCVISD::VSLIDE1UP_VL, DL,
5435 IntVT, DAG.getUNDEF(IntVT), V2, Splat, TrueMask, VL);
5436 Vec = DAG.getBitcast(ContainerVT, Vec);
5437 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5438 }
5439
5440 auto OpCode = IsVSlidedown ?
5441 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) :
5442 (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL);
5443 if (!VT.isFloatingPoint())
5444 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
5445 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
5446 DAG.getUNDEF(ContainerVT),
5447 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
5448 Splat, TrueMask, VL);
5449 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5450}
5451
5452/// Match a mask which "spreads" the leading elements of a vector evenly
5453/// across the result. Factor is the spread amount, and Index is the
5454/// offset applied. (on success, Index < Factor) This is the inverse
5455/// of a deinterleave with the same Factor and Index. This is analogous
5456/// to an interleave, except that all but one lane is undef.
5458 unsigned &Index) {
5459 SmallVector<bool> LaneIsUndef(Factor, true);
5460 for (unsigned i = 0; i < Mask.size(); i++)
5461 LaneIsUndef[i % Factor] &= (Mask[i] == -1);
5462
5463 bool Found = false;
5464 for (unsigned i = 0; i < Factor; i++) {
5465 if (LaneIsUndef[i])
5466 continue;
5467 if (Found)
5468 return false;
5469 Index = i;
5470 Found = true;
5471 }
5472 if (!Found)
5473 return false;
5474
5475 for (unsigned i = 0; i < Mask.size() / Factor; i++) {
5476 unsigned j = i * Factor + Index;
5477 if (Mask[j] != -1 && (unsigned)Mask[j] != i)
5478 return false;
5479 }
5480 return true;
5481}
5482
5483static SDValue lowerVZIP(unsigned Opc, SDValue Op0, SDValue Op1,
5484 const SDLoc &DL, SelectionDAG &DAG,
5485 const RISCVSubtarget &Subtarget) {
5486 assert(RISCVISD::RI_VZIPEVEN_VL == Opc || RISCVISD::RI_VZIPODD_VL == Opc ||
5487 RISCVISD::RI_VZIP2A_VL == Opc || RISCVISD::RI_VZIP2B_VL == Opc ||
5488 RISCVISD::RI_VUNZIP2A_VL == Opc || RISCVISD::RI_VUNZIP2B_VL == Opc);
5490
5491 MVT VT = Op0.getSimpleValueType();
5493 Op0 = DAG.getBitcast(IntVT, Op0);
5494 Op1 = DAG.getBitcast(IntVT, Op1);
5495
5496 MVT ContainerVT = IntVT;
5497 if (VT.isFixedLengthVector()) {
5498 ContainerVT = getContainerForFixedLengthVector(DAG, IntVT, Subtarget);
5499 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
5500 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
5501 }
5502
5503 MVT InnerVT = ContainerVT;
5504 auto [Mask, VL] = getDefaultVLOps(IntVT, InnerVT, DL, DAG, Subtarget);
5505 if (Op1.isUndef() &&
5506 ContainerVT.bitsGT(RISCVTargetLowering::getM1VT(ContainerVT)) &&
5507 (RISCVISD::RI_VUNZIP2A_VL == Opc || RISCVISD::RI_VUNZIP2B_VL == Opc)) {
5508 InnerVT = ContainerVT.getHalfNumVectorElementsVT();
5509 VL = DAG.getConstant(VT.getVectorNumElements() / 2, DL,
5510 Subtarget.getXLenVT());
5511 Mask = getAllOnesMask(InnerVT, VL, DL, DAG);
5512 unsigned HighIdx = InnerVT.getVectorElementCount().getKnownMinValue();
5513 Op1 = DAG.getExtractSubvector(DL, InnerVT, Op0, HighIdx);
5514 Op0 = DAG.getExtractSubvector(DL, InnerVT, Op0, 0);
5515 }
5516
5517 SDValue Passthru = DAG.getUNDEF(InnerVT);
5518 SDValue Res = DAG.getNode(Opc, DL, InnerVT, Op0, Op1, Passthru, Mask, VL);
5519 if (InnerVT.bitsLT(ContainerVT))
5520 Res = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), Res, 0);
5521 if (IntVT.isFixedLengthVector())
5522 Res = convertFromScalableVector(IntVT, Res, DAG, Subtarget);
5523 Res = DAG.getBitcast(VT, Res);
5524 return Res;
5525}
5526
5527// Given a vector a, b, c, d return a vector Factor times longer
5528// with Factor-1 undef's between elements. Ex:
5529// a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0)
5530// undef, a, undef, b, undef, c, undef, d (Factor=2, Index=1)
5531static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index,
5532 const SDLoc &DL, SelectionDAG &DAG) {
5533
5534 MVT VT = V.getSimpleValueType();
5535 unsigned EltBits = VT.getScalarSizeInBits();
5537 V = DAG.getBitcast(VT.changeTypeToInteger(), V);
5538
5539 MVT WideVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor), EC);
5540
5541 SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, V);
5542 // TODO: On rv32, the constant becomes a splat_vector_parts which does not
5543 // allow the SHL to fold away if Index is 0.
5544 if (Index != 0)
5545 Result = DAG.getNode(ISD::SHL, DL, WideVT, Result,
5546 DAG.getConstant(EltBits * Index, DL, WideVT));
5547 // Make sure to use original element type
5549 EC.multiplyCoefficientBy(Factor));
5550 return DAG.getBitcast(ResultVT, Result);
5551}
5552
5553// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
5554// to create an interleaved vector of <[vscale x] n*2 x ty>.
5555// This requires that the size of ty is less than the subtarget's maximum ELEN.
5557 const SDLoc &DL, SelectionDAG &DAG,
5558 const RISCVSubtarget &Subtarget) {
5559
5560 // FIXME: Not only does this optimize the code, it fixes some correctness
5561 // issues because MIR does not have freeze.
5562 if (EvenV.isUndef())
5563 return getWideningSpread(OddV, 2, 1, DL, DAG);
5564 if (OddV.isUndef())
5565 return getWideningSpread(EvenV, 2, 0, DL, DAG);
5566
5567 MVT VecVT = EvenV.getSimpleValueType();
5568 MVT VecContainerVT = VecVT; // <vscale x n x ty>
5569 // Convert fixed vectors to scalable if needed
5570 if (VecContainerVT.isFixedLengthVector()) {
5571 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
5572 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
5573 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
5574 }
5575
5576 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
5577
5578 // We're working with a vector of the same size as the resulting
5579 // interleaved vector, but with half the number of elements and
5580 // twice the SEW (Hence the restriction on not using the maximum
5581 // ELEN)
5582 MVT WideVT =
5584 VecVT.getVectorElementCount());
5585 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
5586 if (WideContainerVT.isFixedLengthVector())
5587 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
5588
5589 // Bitcast the input vectors to integers in case they are FP
5590 VecContainerVT = VecContainerVT.changeTypeToInteger();
5591 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
5592 OddV = DAG.getBitcast(VecContainerVT, OddV);
5593
5594 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
5595 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
5596
5597 SDValue Interleaved;
5598 if (Subtarget.hasStdExtZvbb()) {
5599 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
5600 SDValue OffsetVec =
5601 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
5602 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
5603 OffsetVec, Passthru, Mask, VL);
5604 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
5605 Interleaved, EvenV, Passthru, Mask, VL);
5606 } else {
5607 // FIXME: We should freeze the odd vector here. We already handled the case
5608 // of provably undef/poison above.
5609
5610 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
5611 // vwaddu.vv
5612 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
5613 OddV, Passthru, Mask, VL);
5614
5615 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
5616 SDValue AllOnesVec = DAG.getSplatVector(
5617 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
5618 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
5619 OddV, AllOnesVec, Passthru, Mask, VL);
5620
5621 // Add the two together so we get
5622 // (OddV * 0xff...ff) + (OddV + EvenV)
5623 // = (OddV * 0x100...00) + EvenV
5624 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
5625 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
5626 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
5627 Interleaved, OddsMul, Passthru, Mask, VL);
5628 }
5629
5630 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
5631 MVT ResultContainerVT = MVT::getVectorVT(
5632 VecVT.getVectorElementType(), // Make sure to use original type
5633 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
5634 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
5635
5636 // Convert back to a fixed vector if needed
5637 MVT ResultVT =
5640 if (ResultVT.isFixedLengthVector())
5641 Interleaved =
5642 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
5643
5644 return Interleaved;
5645}
5646
5647// If we have a vector of bits that we want to reverse, we can use a vbrev on a
5648// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
5650 SelectionDAG &DAG,
5651 const RISCVSubtarget &Subtarget) {
5652 SDLoc DL(SVN);
5653 MVT VT = SVN->getSimpleValueType(0);
5654 SDValue V = SVN->getOperand(0);
5655 unsigned NumElts = VT.getVectorNumElements();
5656
5657 assert(VT.getVectorElementType() == MVT::i1);
5658
5660 SVN->getMask().size()) ||
5661 !SVN->getOperand(1).isUndef())
5662 return SDValue();
5663
5664 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
5665 EVT ViaVT = EVT::getVectorVT(
5666 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
5667 EVT ViaBitVT =
5668 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
5669
5670 // If we don't have zvbb or the larger element type > ELEN, the operation will
5671 // be illegal.
5673 ViaVT) ||
5674 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
5675 return SDValue();
5676
5677 // If the bit vector doesn't fit exactly into the larger element type, we need
5678 // to insert it into the larger vector and then shift up the reversed bits
5679 // afterwards to get rid of the gap introduced.
5680 if (ViaEltSize > NumElts)
5681 V = DAG.getInsertSubvector(DL, DAG.getUNDEF(ViaBitVT), V, 0);
5682
5683 SDValue Res =
5684 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
5685
5686 // Shift up the reversed bits if the vector didn't exactly fit into the larger
5687 // element type.
5688 if (ViaEltSize > NumElts)
5689 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
5690 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
5691
5692 Res = DAG.getBitcast(ViaBitVT, Res);
5693
5694 if (ViaEltSize > NumElts)
5695 Res = DAG.getExtractSubvector(DL, VT, Res, 0);
5696 return Res;
5697}
5698
5700 const RISCVSubtarget &Subtarget,
5701 MVT &RotateVT, unsigned &RotateAmt) {
5702 unsigned NumElts = VT.getVectorNumElements();
5703 unsigned EltSizeInBits = VT.getScalarSizeInBits();
5704 unsigned NumSubElts;
5705 if (!ShuffleVectorInst::isBitRotateMask(Mask, EltSizeInBits, 2,
5706 NumElts, NumSubElts, RotateAmt))
5707 return false;
5708 RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
5709 NumElts / NumSubElts);
5710
5711 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
5712 return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);
5713}
5714
5715// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
5716// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
5717// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
5719 SelectionDAG &DAG,
5720 const RISCVSubtarget &Subtarget) {
5721 SDLoc DL(SVN);
5722
5723 EVT VT = SVN->getValueType(0);
5724 unsigned RotateAmt;
5725 MVT RotateVT;
5726 if (!isLegalBitRotate(SVN->getMask(), VT, Subtarget, RotateVT, RotateAmt))
5727 return SDValue();
5728
5729 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
5730
5731 SDValue Rotate;
5732 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
5733 // so canonicalize to vrev8.
5734 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
5735 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
5736 else
5737 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
5738 DAG.getConstant(RotateAmt, DL, RotateVT));
5739
5740 return DAG.getBitcast(VT, Rotate);
5741}
5742
5743// If compiling with an exactly known VLEN, see if we can split a
5744// shuffle on m2 or larger into a small number of m1 sized shuffles
5745// which write each destination registers exactly once.
5747 SelectionDAG &DAG,
5748 const RISCVSubtarget &Subtarget) {
5749 SDLoc DL(SVN);
5750 MVT VT = SVN->getSimpleValueType(0);
5751 SDValue V1 = SVN->getOperand(0);
5752 SDValue V2 = SVN->getOperand(1);
5753 ArrayRef<int> Mask = SVN->getMask();
5754
5755 // If we don't know exact data layout, not much we can do. If this
5756 // is already m1 or smaller, no point in splitting further.
5757 const auto VLen = Subtarget.getRealVLen();
5758 if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)
5759 return SDValue();
5760
5761 // Avoid picking up bitrotate patterns which we have a linear-in-lmul
5762 // expansion for.
5763 unsigned RotateAmt;
5764 MVT RotateVT;
5765 if (isLegalBitRotate(Mask, VT, Subtarget, RotateVT, RotateAmt))
5766 return SDValue();
5767
5768 MVT ElemVT = VT.getVectorElementType();
5769 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
5770
5771 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5772 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
5773 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
5774 assert(M1VT == RISCVTargetLowering::getM1VT(M1VT));
5775 unsigned NumOpElts = M1VT.getVectorMinNumElements();
5776 unsigned NumElts = ContainerVT.getVectorMinNumElements();
5777 unsigned NumOfSrcRegs = NumElts / NumOpElts;
5778 unsigned NumOfDestRegs = NumElts / NumOpElts;
5779 // The following semantically builds up a fixed length concat_vector
5780 // of the component shuffle_vectors. We eagerly lower to scalable here
5781 // to avoid DAG combining it back to a large shuffle_vector again.
5782 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5783 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
5785 Operands;
5787 Mask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs,
5788 [&]() { Operands.emplace_back(); },
5789 [&](ArrayRef<int> SrcSubMask, unsigned SrcVecIdx, unsigned DstVecIdx) {
5790 Operands.emplace_back().emplace_back(SrcVecIdx, UINT_MAX,
5791 SmallVector<int>(SrcSubMask));
5792 },
5793 [&](ArrayRef<int> SrcSubMask, unsigned Idx1, unsigned Idx2, bool NewReg) {
5794 if (NewReg)
5795 Operands.emplace_back();
5796 Operands.back().emplace_back(Idx1, Idx2, SmallVector<int>(SrcSubMask));
5797 });
5798 assert(Operands.size() == NumOfDestRegs && "Whole vector must be processed");
5799 // Note: check that we do not emit too many shuffles here to prevent code
5800 // size explosion.
5801 // TODO: investigate, if it can be improved by extra analysis of the masks to
5802 // check if the code is more profitable.
5803 unsigned NumShuffles = std::accumulate(
5804 Operands.begin(), Operands.end(), 0u,
5805 [&](unsigned N,
5806 ArrayRef<std::tuple<unsigned, unsigned, SmallVector<int>>> Data) {
5807 if (Data.empty())
5808 return N;
5809 N += Data.size();
5810 for (const auto &P : Data) {
5811 unsigned Idx2 = std::get<1>(P);
5812 ArrayRef<int> Mask = std::get<2>(P);
5813 if (Idx2 != UINT_MAX)
5814 ++N;
5815 else if (ShuffleVectorInst::isIdentityMask(Mask, Mask.size()))
5816 --N;
5817 }
5818 return N;
5819 });
5820 if ((NumOfDestRegs > 2 && NumShuffles > NumOfDestRegs) ||
5821 (NumOfDestRegs <= 2 && NumShuffles >= 4))
5822 return SDValue();
5823 auto ExtractValue = [&, &DAG = DAG](SDValue SrcVec, unsigned ExtractIdx) {
5824 SDValue SubVec = DAG.getExtractSubvector(DL, M1VT, SrcVec, ExtractIdx);
5825 SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
5826 return SubVec;
5827 };
5828 auto PerformShuffle = [&, &DAG = DAG](SDValue SubVec1, SDValue SubVec2,
5830 SDValue SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec1, SubVec2, Mask);
5831 return SubVec;
5832 };
5833 SDValue Vec = DAG.getUNDEF(ContainerVT);
5834 for (auto [I, Data] : enumerate(Operands)) {
5835 if (Data.empty())
5836 continue;
5838 for (unsigned I : seq<unsigned>(Data.size())) {
5839 const auto &[Idx1, Idx2, _] = Data[I];
5840 // If the shuffle contains permutation of odd number of elements,
5841 // Idx1 might be used already in the first iteration.
5842 //
5843 // Idx1 = shuffle Idx1, Idx2
5844 // Idx1 = shuffle Idx1, Idx3
5845 SDValue &V = Values.try_emplace(Idx1).first->getSecond();
5846 if (!V)
5847 V = ExtractValue(Idx1 >= NumOfSrcRegs ? V2 : V1,
5848 (Idx1 % NumOfSrcRegs) * NumOpElts);
5849 if (Idx2 != UINT_MAX) {
5850 SDValue &V = Values.try_emplace(Idx2).first->getSecond();
5851 if (!V)
5852 V = ExtractValue(Idx2 >= NumOfSrcRegs ? V2 : V1,
5853 (Idx2 % NumOfSrcRegs) * NumOpElts);
5854 }
5855 }
5856 SDValue V;
5857 for (const auto &[Idx1, Idx2, Mask] : Data) {
5858 SDValue V1 = Values.at(Idx1);
5859 SDValue V2 = Idx2 == UINT_MAX ? V1 : Values.at(Idx2);
5860 V = PerformShuffle(V1, V2, Mask);
5861 Values[Idx1] = V;
5862 }
5863
5864 unsigned InsertIdx = I * NumOpElts;
5865 V = convertToScalableVector(M1VT, V, DAG, Subtarget);
5866 Vec = DAG.getInsertSubvector(DL, Vec, V, InsertIdx);
5867 }
5868 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5869}
5870
5871// Matches a subset of compress masks with a contiguous prefix of output
5872// elements. This could be extended to allow gaps by deciding which
5873// source elements to spuriously demand.
5875 int Last = -1;
5876 bool SawUndef = false;
5877 for (const auto &[Idx, M] : enumerate(Mask)) {
5878 if (M == -1) {
5879 SawUndef = true;
5880 continue;
5881 }
5882 if (SawUndef)
5883 return false;
5884 if (Idx > (unsigned)M)
5885 return false;
5886 if (M <= Last)
5887 return false;
5888 Last = M;
5889 }
5890 return true;
5891}
5892
5893/// Given a shuffle where the indices are disjoint between the two sources,
5894/// e.g.:
5895///
5896/// t2:v4i8 = vector_shuffle t0:v4i8, t1:v4i8, <2, 7, 1, 4>
5897///
5898/// Merge the two sources into one and do a single source shuffle:
5899///
5900/// t2:v4i8 = vselect t1:v4i8, t0:v4i8, <0, 1, 0, 1>
5901/// t3:v4i8 = vector_shuffle t2:v4i8, undef, <2, 3, 1, 0>
5902///
5903/// A vselect will either be merged into a masked instruction or be lowered as a
5904/// vmerge.vvm, which is cheaper than a vrgather.vv.
5906 SelectionDAG &DAG,
5907 const RISCVSubtarget &Subtarget) {
5908 MVT VT = SVN->getSimpleValueType(0);
5909 MVT XLenVT = Subtarget.getXLenVT();
5910 SDLoc DL(SVN);
5911
5912 const ArrayRef<int> Mask = SVN->getMask();
5913
5914 // Work out which source each lane will come from.
5915 SmallVector<int, 16> Srcs(Mask.size(), -1);
5916
5917 for (int Idx : Mask) {
5918 if (Idx == -1)
5919 continue;
5920 unsigned SrcIdx = Idx % Mask.size();
5921 int Src = (uint32_t)Idx < Mask.size() ? 0 : 1;
5922 if (Srcs[SrcIdx] == -1)
5923 // Mark this source as using this lane.
5924 Srcs[SrcIdx] = Src;
5925 else if (Srcs[SrcIdx] != Src)
5926 // The other source is using this lane: not disjoint.
5927 return SDValue();
5928 }
5929
5930 SmallVector<SDValue> SelectMaskVals;
5931 for (int Lane : Srcs) {
5932 if (Lane == -1)
5933 SelectMaskVals.push_back(DAG.getUNDEF(XLenVT));
5934 else
5935 SelectMaskVals.push_back(DAG.getConstant(Lane ? 0 : 1, DL, XLenVT));
5936 }
5937 MVT MaskVT = VT.changeVectorElementType(MVT::i1);
5938 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, SelectMaskVals);
5939 SDValue Select = DAG.getNode(ISD::VSELECT, DL, VT, SelectMask,
5940 SVN->getOperand(0), SVN->getOperand(1));
5941
5942 // Move all indices relative to the first source.
5943 SmallVector<int> NewMask(Mask.size());
5944 for (unsigned I = 0; I < Mask.size(); I++) {
5945 if (Mask[I] == -1)
5946 NewMask[I] = -1;
5947 else
5948 NewMask[I] = Mask[I] % Mask.size();
5949 }
5950
5951 return DAG.getVectorShuffle(VT, DL, Select, DAG.getUNDEF(VT), NewMask);
5952}
5953
5954/// Is this mask local (i.e. elements only move within their local span), and
5955/// repeating (that is, the same rearrangement is being done within each span)?
5956static bool isLocalRepeatingShuffle(ArrayRef<int> Mask, int Span) {
5957 // Require a prefix from the original mask until the consumer code
5958 // is adjusted to rewrite the mask instead of just taking a prefix.
5959 for (auto [I, M] : enumerate(Mask)) {
5960 if (M == -1)
5961 continue;
5962 if ((M / Span) != (int)(I / Span))
5963 return false;
5964 int SpanIdx = I % Span;
5965 int Expected = M % Span;
5966 if (Mask[SpanIdx] != Expected)
5967 return false;
5968 }
5969 return true;
5970}
5971
5972/// Is this mask only using elements from the first span of the input?
5973static bool isLowSourceShuffle(ArrayRef<int> Mask, int Span) {
5974 return all_of(Mask, [&](const auto &Idx) { return Idx == -1 || Idx < Span; });
5975}
5976
5977/// Return true for a mask which performs an arbitrary shuffle within the first
5978/// span, and then repeats that same result across all remaining spans. Note
5979/// that this doesn't check if all the inputs come from a single span!
5980static bool isSpanSplatShuffle(ArrayRef<int> Mask, int Span) {
5981 // Require a prefix from the original mask until the consumer code
5982 // is adjusted to rewrite the mask instead of just taking a prefix.
5983 for (auto [I, M] : enumerate(Mask)) {
5984 if (M == -1)
5985 continue;
5986 int SpanIdx = I % Span;
5987 if (Mask[SpanIdx] != M)
5988 return false;
5989 }
5990 return true;
5991}
5992
5993/// Try to widen element type to get a new mask value for a better permutation
5994/// sequence. This doesn't try to inspect the widened mask for profitability;
5995/// we speculate the widened form is equal or better. This has the effect of
5996/// reducing mask constant sizes - allowing cheaper materialization sequences
5997/// - and index sequence sizes - reducing register pressure and materialization
5998/// cost, at the cost of (possibly) an extra VTYPE toggle.
6000 SDLoc DL(Op);
6001 MVT VT = Op.getSimpleValueType();
6002 MVT ScalarVT = VT.getVectorElementType();
6003 unsigned ElementSize = ScalarVT.getFixedSizeInBits();
6004 SDValue V0 = Op.getOperand(0);
6005 SDValue V1 = Op.getOperand(1);
6006 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
6007
6008 // Avoid wasted work leading to isTypeLegal check failing below
6009 if (ElementSize > 32)
6010 return SDValue();
6011
6012 SmallVector<int, 8> NewMask;
6013 if (!widenShuffleMaskElts(Mask, NewMask))
6014 return SDValue();
6015
6016 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(ElementSize * 2)
6017 : MVT::getIntegerVT(ElementSize * 2);
6018 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
6019 if (!DAG.getTargetLoweringInfo().isTypeLegal(NewVT))
6020 return SDValue();
6021 V0 = DAG.getBitcast(NewVT, V0);
6022 V1 = DAG.getBitcast(NewVT, V1);
6023 return DAG.getBitcast(VT, DAG.getVectorShuffle(NewVT, DL, V0, V1, NewMask));
6024}
6025
6027 const RISCVSubtarget &Subtarget) {
6028 SDValue V1 = Op.getOperand(0);
6029 SDValue V2 = Op.getOperand(1);
6030 SDLoc DL(Op);
6031 MVT XLenVT = Subtarget.getXLenVT();
6032 MVT VT = Op.getSimpleValueType();
6033 unsigned NumElts = VT.getVectorNumElements();
6035
6036 if (VT.getVectorElementType() == MVT::i1) {
6037 // Lower to a vror.vi of a larger element type if possible before we promote
6038 // i1s to i8s.
6039 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
6040 return V;
6041 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
6042 return V;
6043
6044 // Promote i1 shuffle to i8 shuffle.
6045 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
6046 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
6047 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
6048 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
6049 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
6050 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
6051 ISD::SETNE);
6052 }
6053
6054 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
6055
6056 // Store the return value in a single variable instead of structured bindings
6057 // so that we can pass it to GetSlide below, which cannot capture structured
6058 // bindings until C++20.
6059 auto TrueMaskVL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6060 auto [TrueMask, VL] = TrueMaskVL;
6061
6062 if (SVN->isSplat()) {
6063 const int Lane = SVN->getSplatIndex();
6064 if (Lane >= 0) {
6065 MVT SVT = VT.getVectorElementType();
6066
6067 // Turn splatted vector load into a strided load with an X0 stride.
6068 SDValue V = V1;
6069 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
6070 // with undef.
6071 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
6072 int Offset = Lane;
6073 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
6074 int OpElements =
6075 V.getOperand(0).getSimpleValueType().getVectorNumElements();
6076 V = V.getOperand(Offset / OpElements);
6077 Offset %= OpElements;
6078 }
6079
6080 // We need to ensure the load isn't atomic or volatile.
6081 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
6082 auto *Ld = cast<LoadSDNode>(V);
6083 Offset *= SVT.getStoreSize();
6084 SDValue NewAddr = DAG.getMemBasePlusOffset(
6085 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
6086
6087 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
6088 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
6089 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
6090 SDValue IntID =
6091 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
6092 SDValue Ops[] = {Ld->getChain(),
6093 IntID,
6094 DAG.getUNDEF(ContainerVT),
6095 NewAddr,
6096 DAG.getRegister(RISCV::X0, XLenVT),
6097 VL};
6098 SDValue NewLoad = DAG.getMemIntrinsicNode(
6099 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
6101 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
6102 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
6103 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
6104 }
6105
6106 MVT SplatVT = ContainerVT;
6107
6108 // f16 with zvfhmin and bf16 need to use an integer scalar load.
6109 if (SVT == MVT::bf16 ||
6110 (SVT == MVT::f16 && !Subtarget.hasStdExtZfh())) {
6111 SVT = MVT::i16;
6112 SplatVT = ContainerVT.changeVectorElementType(SVT);
6113 }
6114
6115 // Otherwise use a scalar load and splat. This will give the best
6116 // opportunity to fold a splat into the operation. ISel can turn it into
6117 // the x0 strided load if we aren't able to fold away the select.
6118 if (SVT.isFloatingPoint())
6119 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
6120 Ld->getPointerInfo().getWithOffset(Offset),
6121 Ld->getBaseAlign(), Ld->getMemOperand()->getFlags());
6122 else
6123 V = DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
6124 Ld->getPointerInfo().getWithOffset(Offset), SVT,
6125 Ld->getBaseAlign(),
6126 Ld->getMemOperand()->getFlags());
6128
6129 unsigned Opc = SplatVT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
6130 : RISCVISD::VMV_V_X_VL;
6131 SDValue Splat =
6132 DAG.getNode(Opc, DL, SplatVT, DAG.getUNDEF(ContainerVT), V, VL);
6133 Splat = DAG.getBitcast(ContainerVT, Splat);
6134 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
6135 }
6136
6137 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
6138 assert(Lane < (int)NumElts && "Unexpected lane!");
6139 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
6140 V1, DAG.getConstant(Lane, DL, XLenVT),
6141 DAG.getUNDEF(ContainerVT), TrueMask, VL);
6142 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6143 }
6144 }
6145
6146 // For exact VLEN m2 or greater, try to split to m1 operations if we
6147 // can split cleanly.
6148 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
6149 return V;
6150
6151 ArrayRef<int> Mask = SVN->getMask();
6152
6153 if (SDValue V =
6154 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
6155 return V;
6156
6157 if (SDValue V =
6158 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
6159 return V;
6160
6161 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
6162 // available.
6163 if (Subtarget.hasStdExtZvkb())
6164 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
6165 return V;
6166
6167 if (ShuffleVectorInst::isReverseMask(Mask, NumElts) && V2.isUndef() &&
6168 NumElts != 2)
6169 return DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V1);
6170
6171 // If this is a deinterleave(2,4,8) and we can widen the vector, then we can
6172 // use shift and truncate to perform the shuffle.
6173 // TODO: For Factor=6, we can perform the first step of the deinterleave via
6174 // shift-and-trunc reducing total cost for everything except an mf8 result.
6175 // TODO: For Factor=4,8, we can do the same when the ratio isn't high enough
6176 // to do the entire operation.
6177 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
6178 const unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
6179 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
6180 for (unsigned Factor = 2; Factor <= MaxFactor; Factor <<= 1) {
6181 unsigned Index = 0;
6182 if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor, Index) &&
6183 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {
6184 if (SDValue Src = getSingleShuffleSrc(VT, V1, V2))
6185 return getDeinterleaveShiftAndTrunc(DL, VT, Src, Factor, Index, DAG);
6186 if (1 < count_if(Mask,
6187 [&Mask](int Idx) { return Idx < (int)Mask.size(); }) &&
6188 1 < count_if(Mask, [&Mask](int Idx) {
6189 return Idx >= (int)Mask.size();
6190 })) {
6191 // Narrow each source and concatenate them.
6192 // FIXME: For small LMUL it is better to concatenate first.
6193 MVT EltVT = VT.getVectorElementType();
6194 auto EltCnt = VT.getVectorElementCount();
6195 MVT SubVT =
6196 MVT::getVectorVT(EltVT, EltCnt.divideCoefficientBy(Factor));
6197
6198 SDValue Lo =
6199 getDeinterleaveShiftAndTrunc(DL, SubVT, V1, Factor, Index, DAG);
6200 SDValue Hi =
6201 getDeinterleaveShiftAndTrunc(DL, SubVT, V2, Factor, Index, DAG);
6202
6203 SDValue Concat =
6206 if (Factor == 2)
6207 return Concat;
6208
6209 SDValue Vec = DAG.getUNDEF(VT);
6210 return DAG.getInsertSubvector(DL, Vec, Concat, 0);
6211 }
6212 }
6213 }
6214 }
6215
6216 // If this is a deinterleave(2), try using vunzip{a,b}. This mostly catches
6217 // e64 which can't match above.
6218 unsigned Index = 0;
6219 if (Subtarget.hasVendorXRivosVizip() &&
6221 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {
6222 unsigned Opc =
6223 Index == 0 ? RISCVISD::RI_VUNZIP2A_VL : RISCVISD::RI_VUNZIP2B_VL;
6224 if (V2.isUndef())
6225 return lowerVZIP(Opc, V1, V2, DL, DAG, Subtarget);
6226 if (auto VLEN = Subtarget.getRealVLen();
6227 VLEN && VT.getSizeInBits().getKnownMinValue() % *VLEN == 0)
6228 return lowerVZIP(Opc, V1, V2, DL, DAG, Subtarget);
6229 if (SDValue Src = foldConcatVector(V1, V2)) {
6230 EVT NewVT = VT.getDoubleNumVectorElementsVT();
6231 Src = DAG.getExtractSubvector(DL, NewVT, Src, 0);
6232 SDValue Res =
6233 lowerVZIP(Opc, Src, DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
6234 return DAG.getExtractSubvector(DL, VT, Res, 0);
6235 }
6236 // Deinterleave each source and concatenate them, or concat first, then
6237 // deinterleave.
6238 if (1 < count_if(Mask,
6239 [&Mask](int Idx) { return Idx < (int)Mask.size(); }) &&
6240 1 < count_if(Mask,
6241 [&Mask](int Idx) { return Idx >= (int)Mask.size(); })) {
6242
6243 const unsigned EltSize = VT.getScalarSizeInBits();
6244 const unsigned MinVLMAX = Subtarget.getRealMinVLen() / EltSize;
6245 if (NumElts < MinVLMAX) {
6246 MVT ConcatVT = VT.getDoubleNumVectorElementsVT();
6247 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, V1, V2);
6248 SDValue Res =
6249 lowerVZIP(Opc, Concat, DAG.getUNDEF(ConcatVT), DL, DAG, Subtarget);
6250 return DAG.getExtractSubvector(DL, VT, Res, 0);
6251 }
6252
6253 SDValue Lo = lowerVZIP(Opc, V1, DAG.getUNDEF(VT), DL, DAG, Subtarget);
6254 SDValue Hi = lowerVZIP(Opc, V2, DAG.getUNDEF(VT), DL, DAG, Subtarget);
6255
6256 MVT SubVT = VT.getHalfNumVectorElementsVT();
6257 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT,
6258 DAG.getExtractSubvector(DL, SubVT, Lo, 0),
6259 DAG.getExtractSubvector(DL, SubVT, Hi, 0));
6260 }
6261 }
6262
6263 if (SDValue V =
6264 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
6265 return V;
6266
6267 // Detect an interleave shuffle and lower to
6268 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
6269 int EvenSrc, OddSrc;
6270 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget) &&
6271 !(NumElts == 2 &&
6272 ShuffleVectorInst::isSingleSourceMask(Mask, Mask.size()))) {
6273 // Extract the halves of the vectors.
6274 MVT HalfVT = VT.getHalfNumVectorElementsVT();
6275
6276 // Recognize if one half is actually undef; the matching above will
6277 // otherwise reuse the even stream for the undef one. This improves
6278 // spread(2) shuffles.
6279 bool LaneIsUndef[2] = { true, true};
6280 for (const auto &[Idx, M] : enumerate(Mask))
6281 LaneIsUndef[Idx % 2] &= (M == -1);
6282
6283 int Size = Mask.size();
6284 SDValue EvenV, OddV;
6285 if (LaneIsUndef[0]) {
6286 EvenV = DAG.getUNDEF(HalfVT);
6287 } else {
6288 assert(EvenSrc >= 0 && "Undef source?");
6289 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
6290 EvenV = DAG.getExtractSubvector(DL, HalfVT, EvenV, EvenSrc % Size);
6291 }
6292
6293 if (LaneIsUndef[1]) {
6294 OddV = DAG.getUNDEF(HalfVT);
6295 } else {
6296 assert(OddSrc >= 0 && "Undef source?");
6297 OddV = (OddSrc / Size) == 0 ? V1 : V2;
6298 OddV = DAG.getExtractSubvector(DL, HalfVT, OddV, OddSrc % Size);
6299 }
6300
6301 // Prefer vzip2a if available.
6302 // TODO: Extend to matching zip2b if EvenSrc and OddSrc allow.
6303 if (Subtarget.hasVendorXRivosVizip()) {
6304 EvenV = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), EvenV, 0);
6305 OddV = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), OddV, 0);
6306 return lowerVZIP(RISCVISD::RI_VZIP2A_VL, EvenV, OddV, DL, DAG, Subtarget);
6307 }
6308 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
6309 }
6310
6311 // Recognize a pattern which can handled via a pair of vslideup/vslidedown
6312 // instructions (in any combination) with masking on the second instruction.
6313 // Also handles masked slides into an identity source, and single slides
6314 // without masking. Avoid matching bit rotates (which are not also element
6315 // rotates) as slide pairs. This is a performance heuristic, not a
6316 // functional check.
6317 std::array<std::pair<int, int>, 2> SrcInfo;
6318 unsigned RotateAmt;
6319 MVT RotateVT;
6320 if (::isMaskedSlidePair(Mask, SrcInfo) &&
6321 (isElementRotate(SrcInfo, NumElts) ||
6322 !isLegalBitRotate(Mask, VT, Subtarget, RotateVT, RotateAmt))) {
6323 SDValue Sources[2];
6324 auto GetSourceFor = [&](const std::pair<int, int> &Info) {
6325 int SrcIdx = Info.first;
6326 assert(SrcIdx == 0 || SrcIdx == 1);
6327 SDValue &Src = Sources[SrcIdx];
6328 if (!Src) {
6329 SDValue SrcV = SrcIdx == 0 ? V1 : V2;
6330 Src = convertToScalableVector(ContainerVT, SrcV, DAG, Subtarget);
6331 }
6332 return Src;
6333 };
6334 auto GetSlide = [&](const std::pair<int, int> &Src, SDValue Mask,
6335 SDValue Passthru) {
6336 auto [TrueMask, VL] = TrueMaskVL;
6337 SDValue SrcV = GetSourceFor(Src);
6338 int SlideAmt = Src.second;
6339 if (SlideAmt == 0) {
6340 // Should never be second operation
6341 assert(Mask == TrueMask);
6342 return SrcV;
6343 }
6344 if (SlideAmt < 0)
6345 return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, SrcV,
6346 DAG.getConstant(-SlideAmt, DL, XLenVT), Mask, VL,
6348 return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, SrcV,
6349 DAG.getConstant(SlideAmt, DL, XLenVT), Mask, VL,
6351 };
6352
6353 if (SrcInfo[1].first == -1) {
6354 SDValue Res = DAG.getUNDEF(ContainerVT);
6355 Res = GetSlide(SrcInfo[0], TrueMask, Res);
6356 return convertFromScalableVector(VT, Res, DAG, Subtarget);
6357 }
6358
6359 if (Subtarget.hasVendorXRivosVizip()) {
6360 bool TryWiden = false;
6361 unsigned Factor;
6362 if (isZipEven(SrcInfo, Mask, Factor)) {
6363 if (Factor == 1) {
6364 SDValue Src1 = SrcInfo[0].first == 0 ? V1 : V2;
6365 SDValue Src2 = SrcInfo[1].first == 0 ? V1 : V2;
6366 return lowerVZIP(RISCVISD::RI_VZIPEVEN_VL, Src1, Src2, DL, DAG,
6367 Subtarget);
6368 }
6369 TryWiden = true;
6370 }
6371 if (isZipOdd(SrcInfo, Mask, Factor)) {
6372 if (Factor == 1) {
6373 SDValue Src1 = SrcInfo[1].first == 0 ? V1 : V2;
6374 SDValue Src2 = SrcInfo[0].first == 0 ? V1 : V2;
6375 return lowerVZIP(RISCVISD::RI_VZIPODD_VL, Src1, Src2, DL, DAG,
6376 Subtarget);
6377 }
6378 TryWiden = true;
6379 }
6380 // If we found a widening oppurtunity which would let us form a
6381 // zipeven or zipodd, use the generic code to widen the shuffle
6382 // and recurse through this logic.
6383 if (TryWiden)
6384 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
6385 return V;
6386 }
6387
6388 // Build the mask. Note that vslideup unconditionally preserves elements
6389 // below the slide amount in the destination, and thus those elements are
6390 // undefined in the mask. If the mask ends up all true (or undef), it
6391 // will be folded away by general logic.
6392 SmallVector<SDValue> MaskVals;
6393 for (const auto &[Idx, M] : enumerate(Mask)) {
6394 if (M < 0 ||
6395 (SrcInfo[1].second > 0 && Idx < (unsigned)SrcInfo[1].second)) {
6396 MaskVals.push_back(DAG.getUNDEF(XLenVT));
6397 continue;
6398 }
6399 int Src = M >= (int)NumElts;
6400 int Diff = (int)Idx - (M % NumElts);
6401 bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second;
6402 assert(C ^ (Src == SrcInfo[0].first && Diff == SrcInfo[0].second) &&
6403 "Must match exactly one of the two slides");
6404 MaskVals.push_back(DAG.getConstant(C, DL, XLenVT));
6405 }
6406 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
6407 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
6408 SDValue SelectMask = convertToScalableVector(
6409 ContainerVT.changeVectorElementType(MVT::i1),
6410 DAG.getBuildVector(MaskVT, DL, MaskVals), DAG, Subtarget);
6411
6412 SDValue Res = DAG.getUNDEF(ContainerVT);
6413 Res = GetSlide(SrcInfo[0], TrueMask, Res);
6414 Res = GetSlide(SrcInfo[1], SelectMask, Res);
6415 return convertFromScalableVector(VT, Res, DAG, Subtarget);
6416 }
6417
6418 // Handle any remaining single source shuffles
6419 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
6420 if (V2.isUndef()) {
6421 // We might be able to express the shuffle as a bitrotate. But even if we
6422 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
6423 // shifts and a vor will have a higher throughput than a vrgather.
6424 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
6425 return V;
6426
6427 if (SDValue V = lowerVECTOR_SHUFFLEAsVRGatherVX(SVN, Subtarget, DAG))
6428 return V;
6429
6430 // Match a spread(4,8) which can be done via extend and shift. Spread(2)
6431 // is fully covered in interleave(2) above, so it is ignored here.
6432 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
6433 unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
6434 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
6435 for (unsigned Factor = 4; Factor <= MaxFactor; Factor <<= 1) {
6436 unsigned Index;
6437 if (RISCVTargetLowering::isSpreadMask(Mask, Factor, Index)) {
6438 MVT NarrowVT =
6439 MVT::getVectorVT(VT.getVectorElementType(), NumElts / Factor);
6440 SDValue Src = DAG.getExtractSubvector(DL, NarrowVT, V1, 0);
6441 return getWideningSpread(Src, Factor, Index, DL, DAG);
6442 }
6443 }
6444 }
6445
6446 // If only a prefix of the source elements influence a prefix of the
6447 // destination elements, try to see if we can reduce the required LMUL
6448 unsigned MinVLen = Subtarget.getRealMinVLen();
6449 unsigned MinVLMAX = MinVLen / VT.getScalarSizeInBits();
6450 if (NumElts > MinVLMAX) {
6451 unsigned MaxIdx = 0;
6452 for (auto [I, M] : enumerate(Mask)) {
6453 if (M == -1)
6454 continue;
6455 MaxIdx = std::max(std::max((unsigned)I, (unsigned)M), MaxIdx);
6456 }
6457 unsigned NewNumElts =
6458 std::max((uint64_t)MinVLMAX, PowerOf2Ceil(MaxIdx + 1));
6459 if (NewNumElts != NumElts) {
6460 MVT NewVT = MVT::getVectorVT(VT.getVectorElementType(), NewNumElts);
6461 V1 = DAG.getExtractSubvector(DL, NewVT, V1, 0);
6462 SDValue Res = DAG.getVectorShuffle(NewVT, DL, V1, DAG.getUNDEF(NewVT),
6463 Mask.take_front(NewNumElts));
6464 return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), Res, 0);
6465 }
6466 }
6467
6468 // Before hitting generic lowering fallbacks, try to widen the mask
6469 // to a wider SEW.
6470 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
6471 return V;
6472
6473 // Can we generate a vcompress instead of a vrgather? These scale better
6474 // at high LMUL, at the cost of not being able to fold a following select
6475 // into them. The mask constants are also smaller than the index vector
6476 // constants, and thus easier to materialize.
6477 if (isCompressMask(Mask)) {
6478 SmallVector<SDValue> MaskVals(NumElts,
6479 DAG.getConstant(false, DL, XLenVT));
6480 for (auto Idx : Mask) {
6481 if (Idx == -1)
6482 break;
6483 assert(Idx >= 0 && (unsigned)Idx < NumElts);
6484 MaskVals[Idx] = DAG.getConstant(true, DL, XLenVT);
6485 }
6486 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
6487 SDValue CompressMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
6488 return DAG.getNode(ISD::VECTOR_COMPRESS, DL, VT, V1, CompressMask,
6489 DAG.getUNDEF(VT));
6490 }
6491
6492 if (VT.getScalarSizeInBits() == 8 &&
6493 any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
6494 // On such a vector we're unable to use i8 as the index type.
6495 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
6496 // may involve vector splitting if we're already at LMUL=8, or our
6497 // user-supplied maximum fixed-length LMUL.
6498 return SDValue();
6499 }
6500
6501 // Base case for the two operand recursion below - handle the worst case
6502 // single source shuffle.
6503 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
6504 MVT IndexVT = VT.changeTypeToInteger();
6505 // Since we can't introduce illegal index types at this stage, use i16 and
6506 // vrgatherei16 if the corresponding index type for plain vrgather is greater
6507 // than XLenVT.
6508 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
6509 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
6510 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
6511 }
6512
6513 // If the mask allows, we can do all the index computation in 16 bits. This
6514 // requires less work and less register pressure at high LMUL, and creates
6515 // smaller constants which may be cheaper to materialize.
6516 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
6517 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
6518 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
6519 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
6520 }
6521
6522 MVT IndexContainerVT =
6523 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
6524
6525 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
6526 SmallVector<SDValue> GatherIndicesLHS;
6527 for (int MaskIndex : Mask) {
6528 bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
6529 GatherIndicesLHS.push_back(IsLHSIndex
6530 ? DAG.getConstant(MaskIndex, DL, XLenVT)
6531 : DAG.getUNDEF(XLenVT));
6532 }
6533 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
6534 LHSIndices =
6535 convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
6536 // At m1 and less, there's no point trying any of the high LMUL splitting
6537 // techniques. TODO: Should we reconsider this for DLEN < VLEN?
6538 if (NumElts <= MinVLMAX) {
6539 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
6540 DAG.getUNDEF(ContainerVT), TrueMask, VL);
6541 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6542 }
6543
6544 const MVT M1VT = RISCVTargetLowering::getM1VT(ContainerVT);
6545 EVT SubIndexVT = M1VT.changeVectorElementType(IndexVT.getScalarType());
6546 auto [InnerTrueMask, InnerVL] =
6547 getDefaultScalableVLOps(M1VT, DL, DAG, Subtarget);
6548 int N =
6549 ContainerVT.getVectorMinNumElements() / M1VT.getVectorMinNumElements();
6550 assert(isPowerOf2_32(N) && N <= 8);
6551
6552 // If we have a locally repeating mask, then we can reuse the first
6553 // register in the index register group for all registers within the
6554 // source register group. TODO: This generalizes to m2, and m4.
6555 if (isLocalRepeatingShuffle(Mask, MinVLMAX)) {
6556 SDValue SubIndex = DAG.getExtractSubvector(DL, SubIndexVT, LHSIndices, 0);
6557 SDValue Gather = DAG.getUNDEF(ContainerVT);
6558 for (int i = 0; i < N; i++) {
6559 unsigned SubIdx = M1VT.getVectorMinNumElements() * i;
6560 SDValue SubV1 = DAG.getExtractSubvector(DL, M1VT, V1, SubIdx);
6561 SDValue SubVec =
6562 DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
6563 DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
6564 Gather = DAG.getInsertSubvector(DL, Gather, SubVec, SubIdx);
6565 }
6566 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6567 }
6568
6569 // If we have a shuffle which only uses the first register in our source
6570 // register group, and repeats the same index across all spans, we can
6571 // use a single vrgather (and possibly some register moves).
6572 // TODO: This can be generalized for m2 or m4, or for any shuffle for
6573 // which we can do a linear number of shuffles to form an m1 which
6574 // contains all the output elements.
6575 if (isLowSourceShuffle(Mask, MinVLMAX) &&
6576 isSpanSplatShuffle(Mask, MinVLMAX)) {
6577 SDValue SubV1 = DAG.getExtractSubvector(DL, M1VT, V1, 0);
6578 SDValue SubIndex = DAG.getExtractSubvector(DL, SubIndexVT, LHSIndices, 0);
6579 SDValue SubVec = DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
6580 DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
6581 SDValue Gather = DAG.getUNDEF(ContainerVT);
6582 for (int i = 0; i < N; i++)
6583 Gather = DAG.getInsertSubvector(DL, Gather, SubVec,
6584 M1VT.getVectorMinNumElements() * i);
6585 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6586 }
6587
6588 // If we have a shuffle which only uses the first register in our
6589 // source register group, we can do a linear number of m1 vrgathers
6590 // reusing the same source register (but with different indices)
6591 // TODO: This can be generalized for m2 or m4, or for any shuffle
6592 // for which we can do a vslidedown followed by this expansion.
6593 if (isLowSourceShuffle(Mask, MinVLMAX)) {
6594 SDValue SlideAmt =
6595 DAG.getElementCount(DL, XLenVT, M1VT.getVectorElementCount());
6596 SDValue SubV1 = DAG.getExtractSubvector(DL, M1VT, V1, 0);
6597 SDValue Gather = DAG.getUNDEF(ContainerVT);
6598 for (int i = 0; i < N; i++) {
6599 if (i != 0)
6600 LHSIndices = getVSlidedown(DAG, Subtarget, DL, IndexContainerVT,
6601 DAG.getUNDEF(IndexContainerVT), LHSIndices,
6602 SlideAmt, TrueMask, VL);
6603 SDValue SubIndex =
6604 DAG.getExtractSubvector(DL, SubIndexVT, LHSIndices, 0);
6605 SDValue SubVec =
6606 DAG.getNode(GatherVVOpc, DL, M1VT, SubV1, SubIndex,
6607 DAG.getUNDEF(M1VT), InnerTrueMask, InnerVL);
6608 Gather = DAG.getInsertSubvector(DL, Gather, SubVec,
6609 M1VT.getVectorMinNumElements() * i);
6610 }
6611 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6612 }
6613
6614 // Fallback to generic vrgather if we can't find anything better.
6615 // On many machines, this will be O(LMUL^2)
6616 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
6617 DAG.getUNDEF(ContainerVT), TrueMask, VL);
6618 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
6619 }
6620
6621 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
6622 // merged with a second vrgather.
6623 SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
6624
6625 // Now construct the mask that will be used by the blended vrgather operation.
6626 // Construct the appropriate indices into each vector.
6627 for (int MaskIndex : Mask) {
6628 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
6629 ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
6630 ? MaskIndex : -1);
6631 ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
6632 }
6633
6634 // If the mask indices are disjoint between the two sources, we can lower it
6635 // as a vselect + a single source vrgather.vv. Don't do this if we think the
6636 // operands may end up being lowered to something cheaper than a vrgather.vv.
6637 if (!DAG.isSplatValue(V2) && !DAG.isSplatValue(V1) &&
6638 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskLHS) &&
6639 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskRHS) &&
6640 !ShuffleVectorInst::isIdentityMask(ShuffleMaskLHS, NumElts) &&
6641 !ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts))
6642 if (SDValue V = lowerDisjointIndicesShuffle(SVN, DAG, Subtarget))
6643 return V;
6644
6645 // Before hitting generic lowering fallbacks, try to widen the mask
6646 // to a wider SEW.
6647 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
6648 return V;
6649
6650 // Try to pick a profitable operand order.
6651 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
6652 SwapOps = SwapOps ^ ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts);
6653
6654 // Recursively invoke lowering for each operand if we had two
6655 // independent single source shuffles, and then combine the result via a
6656 // vselect. Note that the vselect will likely be folded back into the
6657 // second permute (vrgather, or other) by the post-isel combine.
6658 V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
6659 V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);
6660
6661 SmallVector<SDValue> MaskVals;
6662 for (int MaskIndex : Mask) {
6663 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
6664 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
6665 }
6666
6667 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
6668 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
6669 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
6670
6671 if (SwapOps)
6672 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
6673 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);
6674}
6675
6677 // Only support legal VTs for other shuffles for now.
6678 if (!isTypeLegal(VT))
6679 return false;
6680
6681 // Support splats for any type. These should type legalize well.
6683 return true;
6684
6685 const unsigned NumElts = M.size();
6686 MVT SVT = VT.getSimpleVT();
6687
6688 // Not for i1 vectors.
6689 if (SVT.getScalarType() == MVT::i1)
6690 return false;
6691
6692 std::array<std::pair<int, int>, 2> SrcInfo;
6693 int Dummy1, Dummy2;
6694 return ShuffleVectorInst::isReverseMask(M, NumElts) ||
6695 (::isMaskedSlidePair(M, SrcInfo) &&
6696 isElementRotate(SrcInfo, NumElts)) ||
6697 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
6698}
6699
6700// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
6701// the exponent.
6702SDValue
6703RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
6704 SelectionDAG &DAG) const {
6705 MVT VT = Op.getSimpleValueType();
6706 unsigned EltSize = VT.getScalarSizeInBits();
6707 SDValue Src = Op.getOperand(0);
6708 SDLoc DL(Op);
6709 MVT ContainerVT = VT;
6710
6711 SDValue Mask, VL;
6712 if (Op->isVPOpcode()) {
6713 Mask = Op.getOperand(1);
6714 if (VT.isFixedLengthVector())
6715 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6716 Subtarget);
6717 VL = Op.getOperand(2);
6718 }
6719
6720 // We choose FP type that can represent the value if possible. Otherwise, we
6721 // use rounding to zero conversion for correct exponent of the result.
6722 // TODO: Use f16 for i8 when possible?
6723 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
6724 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
6725 FloatEltVT = MVT::f32;
6726 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
6727
6728 // Legal types should have been checked in the RISCVTargetLowering
6729 // constructor.
6730 // TODO: Splitting may make sense in some cases.
6731 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
6732 "Expected legal float type!");
6733
6734 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
6735 // The trailing zero count is equal to log2 of this single bit value.
6736 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
6737 SDValue Neg = DAG.getNegative(Src, DL, VT);
6738 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
6739 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
6740 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
6741 Src, Mask, VL);
6742 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
6743 }
6744
6745 // We have a legal FP type, convert to it.
6746 SDValue FloatVal;
6747 if (FloatVT.bitsGT(VT)) {
6748 if (Op->isVPOpcode())
6749 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
6750 else
6751 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
6752 } else {
6753 // Use RTZ to avoid rounding influencing exponent of FloatVal.
6754 if (VT.isFixedLengthVector()) {
6755 ContainerVT = getContainerForFixedLengthVector(VT);
6756 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
6757 }
6758 if (!Op->isVPOpcode())
6759 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6760 SDValue RTZRM =
6761 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT());
6762 MVT ContainerFloatVT =
6763 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
6764 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
6765 Src, Mask, RTZRM, VL);
6766 if (VT.isFixedLengthVector())
6767 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
6768 }
6769 // Bitcast to integer and shift the exponent to the LSB.
6770 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
6771 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
6772 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
6773
6774 SDValue Exp;
6775 // Restore back to original type. Truncation after SRL is to generate vnsrl.
6776 if (Op->isVPOpcode()) {
6777 Exp = DAG.getNode(ISD::VP_SRL, DL, IntVT, Bitcast,
6778 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
6779 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
6780 } else {
6781 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
6782 DAG.getConstant(ShiftAmt, DL, IntVT));
6783 if (IntVT.bitsLT(VT))
6784 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
6785 else if (IntVT.bitsGT(VT))
6786 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
6787 }
6788
6789 // The exponent contains log2 of the value in biased form.
6790 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
6791 // For trailing zeros, we just need to subtract the bias.
6792 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
6793 return DAG.getNode(ISD::SUB, DL, VT, Exp,
6794 DAG.getConstant(ExponentBias, DL, VT));
6795 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
6796 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
6797 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
6798
6799 // For leading zeros, we need to remove the bias and convert from log2 to
6800 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
6801 unsigned Adjust = ExponentBias + (EltSize - 1);
6802 SDValue Res;
6803 if (Op->isVPOpcode())
6804 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
6805 Mask, VL);
6806 else
6807 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
6808
6809 // The above result with zero input equals to Adjust which is greater than
6810 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
6811 if (Op.getOpcode() == ISD::CTLZ)
6812 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
6813 else if (Op.getOpcode() == ISD::VP_CTLZ)
6814 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
6815 DAG.getConstant(EltSize, DL, VT), Mask, VL);
6816 return Res;
6817}
6818
6819SDValue RISCVTargetLowering::lowerVPCttzElements(SDValue Op,
6820 SelectionDAG &DAG) const {
6821 SDLoc DL(Op);
6822 MVT XLenVT = Subtarget.getXLenVT();
6823 SDValue Source = Op->getOperand(0);
6824 MVT SrcVT = Source.getSimpleValueType();
6825 SDValue Mask = Op->getOperand(1);
6826 SDValue EVL = Op->getOperand(2);
6827
6828 if (SrcVT.isFixedLengthVector()) {
6829 MVT ContainerVT = getContainerForFixedLengthVector(SrcVT);
6830 Source = convertToScalableVector(ContainerVT, Source, DAG, Subtarget);
6831 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6832 Subtarget);
6833 SrcVT = ContainerVT;
6834 }
6835
6836 // Convert to boolean vector.
6837 if (SrcVT.getScalarType() != MVT::i1) {
6838 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
6839 SrcVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorElementCount());
6840 Source = DAG.getNode(RISCVISD::SETCC_VL, DL, SrcVT,
6841 {Source, AllZero, DAG.getCondCode(ISD::SETNE),
6842 DAG.getUNDEF(SrcVT), Mask, EVL});
6843 }
6844
6845 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Source, Mask, EVL);
6846 if (Op->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_UNDEF)
6847 // In this case, we can interpret poison as -1, so nothing to do further.
6848 return Res;
6849
6850 // Convert -1 to VL.
6851 SDValue SetCC =
6852 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
6853 Res = DAG.getSelect(DL, XLenVT, SetCC, EVL, Res);
6854 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
6855}
6856
6857// While RVV has alignment restrictions, we should always be able to load as a
6858// legal equivalently-sized byte-typed vector instead. This method is
6859// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
6860// the load is already correctly-aligned, it returns SDValue().
6861SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
6862 SelectionDAG &DAG) const {
6863 auto *Load = cast<LoadSDNode>(Op);
6864 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
6865
6867 Load->getMemoryVT(),
6868 *Load->getMemOperand()))
6869 return SDValue();
6870
6871 SDLoc DL(Op);
6872 MVT VT = Op.getSimpleValueType();
6873 unsigned EltSizeBits = VT.getScalarSizeInBits();
6874 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
6875 "Unexpected unaligned RVV load type");
6876 MVT NewVT =
6877 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
6878 assert(NewVT.isValid() &&
6879 "Expecting equally-sized RVV vector types to be legal");
6880 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
6881 Load->getPointerInfo(), Load->getBaseAlign(),
6882 Load->getMemOperand()->getFlags());
6883 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
6884}
6885
6886// While RVV has alignment restrictions, we should always be able to store as a
6887// legal equivalently-sized byte-typed vector instead. This method is
6888// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
6889// returns SDValue() if the store is already correctly aligned.
6890SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
6891 SelectionDAG &DAG) const {
6892 auto *Store = cast<StoreSDNode>(Op);
6893 assert(Store && Store->getValue().getValueType().isVector() &&
6894 "Expected vector store");
6895
6897 Store->getMemoryVT(),
6898 *Store->getMemOperand()))
6899 return SDValue();
6900
6901 SDLoc DL(Op);
6902 SDValue StoredVal = Store->getValue();
6903 MVT VT = StoredVal.getSimpleValueType();
6904 unsigned EltSizeBits = VT.getScalarSizeInBits();
6905 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
6906 "Unexpected unaligned RVV store type");
6907 MVT NewVT =
6908 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
6909 assert(NewVT.isValid() &&
6910 "Expecting equally-sized RVV vector types to be legal");
6911 StoredVal = DAG.getBitcast(NewVT, StoredVal);
6912 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
6913 Store->getPointerInfo(), Store->getBaseAlign(),
6914 Store->getMemOperand()->getFlags());
6915}
6916
6917// While RVV has alignment restrictions, we should always be able to load as a
6918// legal equivalently-sized byte-typed vector instead. This method is
6919// responsible for re-expressing a ISD::VP_LOAD via a correctly-aligned type. If
6920// the load is already correctly-aligned, it returns SDValue().
6921SDValue RISCVTargetLowering::expandUnalignedVPLoad(SDValue Op,
6922 SelectionDAG &DAG) const {
6923 auto *Load = cast<VPLoadSDNode>(Op);
6924 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
6925
6927 Load->getMemoryVT(),
6928 *Load->getMemOperand()))
6929 return SDValue();
6930
6931 SDValue Mask = Load->getMask();
6932
6933 // FIXME: Handled masked loads somehow.
6935 return SDValue();
6936
6937 SDLoc DL(Op);
6938 MVT VT = Op.getSimpleValueType();
6939 unsigned EltSizeBits = VT.getScalarSizeInBits();
6940 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
6941 "Unexpected unaligned RVV load type");
6942 MVT NewVT =
6943 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
6944 assert(NewVT.isValid() &&
6945 "Expecting equally-sized RVV vector types to be legal");
6946
6947 SDValue VL = Load->getVectorLength();
6948 VL = DAG.getNode(ISD::MUL, DL, VL.getValueType(), VL,
6949 DAG.getConstant((EltSizeBits / 8), DL, VL.getValueType()));
6950
6951 MVT MaskVT = MVT::getVectorVT(MVT::i1, NewVT.getVectorElementCount());
6952 SDValue L = DAG.getLoadVP(NewVT, DL, Load->getChain(), Load->getBasePtr(),
6953 DAG.getAllOnesConstant(DL, MaskVT), VL,
6954 Load->getPointerInfo(), Load->getBaseAlign(),
6955 Load->getMemOperand()->getFlags(), AAMDNodes());
6956 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
6957}
6958
6959// While RVV has alignment restrictions, we should always be able to store as a
6960// legal equivalently-sized byte-typed vector instead. This method is
6961// responsible for re-expressing a ISD::VP STORE via a correctly-aligned type.
6962// It returns SDValue() if the store is already correctly aligned.
6963SDValue RISCVTargetLowering::expandUnalignedVPStore(SDValue Op,
6964 SelectionDAG &DAG) const {
6965 auto *Store = cast<VPStoreSDNode>(Op);
6966 assert(Store && Store->getValue().getValueType().isVector() &&
6967 "Expected vector store");
6968
6970 Store->getMemoryVT(),
6971 *Store->getMemOperand()))
6972 return SDValue();
6973
6974 SDValue Mask = Store->getMask();
6975
6976 // FIXME: Handled masked stores somehow.
6978 return SDValue();
6979
6980 SDLoc DL(Op);
6981 SDValue StoredVal = Store->getValue();
6982 MVT VT = StoredVal.getSimpleValueType();
6983 unsigned EltSizeBits = VT.getScalarSizeInBits();
6984 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
6985 "Unexpected unaligned RVV store type");
6986 MVT NewVT =
6987 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
6988 assert(NewVT.isValid() &&
6989 "Expecting equally-sized RVV vector types to be legal");
6990
6991 SDValue VL = Store->getVectorLength();
6992 VL = DAG.getNode(ISD::MUL, DL, VL.getValueType(), VL,
6993 DAG.getConstant((EltSizeBits / 8), DL, VL.getValueType()));
6994
6995 StoredVal = DAG.getBitcast(NewVT, StoredVal);
6996
6997 LocationSize Size = LocationSize::precise(NewVT.getStoreSize());
6998 MachineFunction &MF = DAG.getMachineFunction();
6999 MachineMemOperand *MMO = MF.getMachineMemOperand(
7000 Store->getPointerInfo(), Store->getMemOperand()->getFlags(), Size,
7001 Store->getBaseAlign());
7002
7003 MVT MaskVT = MVT::getVectorVT(MVT::i1, NewVT.getVectorElementCount());
7004 return DAG.getStoreVP(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
7005 DAG.getUNDEF(Store->getBasePtr().getValueType()),
7006 DAG.getAllOnesConstant(DL, MaskVT), VL, NewVT, MMO,
7008}
7009
7011 const RISCVSubtarget &Subtarget) {
7012 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
7013
7014 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
7015
7016 // All simm32 constants should be handled by isel.
7017 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
7018 // this check redundant, but small immediates are common so this check
7019 // should have better compile time.
7020 if (isInt<32>(Imm))
7021 return Op;
7022
7023 // We only need to cost the immediate, if constant pool lowering is enabled.
7024 if (!Subtarget.useConstantPoolForLargeInts())
7025 return Op;
7026
7028 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
7029 return Op;
7030
7031 // Optimizations below are disabled for opt size. If we're optimizing for
7032 // size, use a constant pool.
7033 if (DAG.shouldOptForSize())
7034 return SDValue();
7035
7036 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
7037 // that if it will avoid a constant pool.
7038 // It will require an extra temporary register though.
7039 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
7040 // low and high 32 bits are the same and bit 31 and 63 are set.
7041 unsigned ShiftAmt, AddOpc;
7042 RISCVMatInt::InstSeq SeqLo =
7043 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
7044 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
7045 return Op;
7046
7047 return SDValue();
7048}
7049
7050SDValue RISCVTargetLowering::lowerConstantFP(SDValue Op,
7051 SelectionDAG &DAG) const {
7052 MVT VT = Op.getSimpleValueType();
7053 const APFloat &Imm = cast<ConstantFPSDNode>(Op)->getValueAPF();
7054
7055 // Can this constant be selected by a Zfa FLI instruction?
7056 bool Negate = false;
7057 int Index = getLegalZfaFPImm(Imm, VT);
7058
7059 // If the constant is negative, try negating.
7060 if (Index < 0 && Imm.isNegative()) {
7061 Index = getLegalZfaFPImm(-Imm, VT);
7062 Negate = true;
7063 }
7064
7065 // If we couldn't find a FLI lowering, fall back to generic code.
7066 if (Index < 0)
7067 return SDValue();
7068
7069 // Emit an FLI+FNEG. We use a custom node to hide from constant folding.
7070 SDLoc DL(Op);
7071 SDValue Const =
7072 DAG.getNode(RISCVISD::FLI, DL, VT,
7073 DAG.getTargetConstant(Index, DL, Subtarget.getXLenVT()));
7074 if (!Negate)
7075 return Const;
7076
7077 return DAG.getNode(ISD::FNEG, DL, VT, Const);
7078}
7079
7081 SelectionDAG &DAG) {
7082
7083 unsigned IsData = Op.getConstantOperandVal(4);
7084
7085 // mips-p8700 we support data prefetch for now.
7086 if (Subtarget.hasVendorXMIPSCBOP() && !IsData)
7087 return Op.getOperand(0);
7088 return Op;
7089}
7090
7092 const RISCVSubtarget &Subtarget) {
7093 SDLoc dl(Op);
7094 AtomicOrdering FenceOrdering =
7095 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
7096 SyncScope::ID FenceSSID =
7097 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
7098
7099 if (Subtarget.hasStdExtZtso()) {
7100 // The only fence that needs an instruction is a sequentially-consistent
7101 // cross-thread fence.
7102 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
7103 FenceSSID == SyncScope::System)
7104 return Op;
7105
7106 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
7107 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
7108 }
7109
7110 // singlethread fences only synchronize with signal handlers on the same
7111 // thread and thus only need to preserve instruction order, not actually
7112 // enforce memory ordering.
7113 if (FenceSSID == SyncScope::SingleThread)
7114 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
7115 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
7116
7117 return Op;
7118}
7119
7120SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
7121 SelectionDAG &DAG) const {
7122 SDLoc DL(Op);
7123 MVT VT = Op.getSimpleValueType();
7124 MVT XLenVT = Subtarget.getXLenVT();
7125 unsigned Check = Op.getConstantOperandVal(1);
7126 unsigned TDCMask = 0;
7127 if (Check & fcSNan)
7128 TDCMask |= RISCV::FPMASK_Signaling_NaN;
7129 if (Check & fcQNan)
7130 TDCMask |= RISCV::FPMASK_Quiet_NaN;
7131 if (Check & fcPosInf)
7133 if (Check & fcNegInf)
7135 if (Check & fcPosNormal)
7137 if (Check & fcNegNormal)
7139 if (Check & fcPosSubnormal)
7141 if (Check & fcNegSubnormal)
7143 if (Check & fcPosZero)
7144 TDCMask |= RISCV::FPMASK_Positive_Zero;
7145 if (Check & fcNegZero)
7146 TDCMask |= RISCV::FPMASK_Negative_Zero;
7147
7148 bool IsOneBitMask = isPowerOf2_32(TDCMask);
7149
7150 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
7151
7152 if (VT.isVector()) {
7153 SDValue Op0 = Op.getOperand(0);
7154 MVT VT0 = Op.getOperand(0).getSimpleValueType();
7155
7156 if (VT.isScalableVector()) {
7157 MVT DstVT = VT0.changeVectorElementTypeToInteger();
7158 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
7159 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
7160 Mask = Op.getOperand(2);
7161 VL = Op.getOperand(3);
7162 }
7163 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
7164 VL, Op->getFlags());
7165 if (IsOneBitMask)
7166 return DAG.getSetCC(DL, VT, FPCLASS,
7167 DAG.getConstant(TDCMask, DL, DstVT),
7169 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
7170 DAG.getConstant(TDCMask, DL, DstVT));
7171 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
7172 ISD::SETNE);
7173 }
7174
7175 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
7176 MVT ContainerVT = getContainerForFixedLengthVector(VT);
7177 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
7178 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
7179 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
7180 Mask = Op.getOperand(2);
7181 MVT MaskContainerVT =
7182 getContainerForFixedLengthVector(Mask.getSimpleValueType());
7183 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
7184 VL = Op.getOperand(3);
7185 }
7186 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
7187
7188 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
7189 Mask, VL, Op->getFlags());
7190
7191 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
7192 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
7193 if (IsOneBitMask) {
7194 SDValue VMSEQ =
7195 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
7196 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
7197 DAG.getUNDEF(ContainerVT), Mask, VL});
7198 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
7199 }
7200 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
7201 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
7202
7203 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
7204 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
7205 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
7206
7207 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
7208 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
7209 DAG.getUNDEF(ContainerVT), Mask, VL});
7210 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
7211 }
7212
7213 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
7214 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
7215 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
7217 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
7218}
7219
7220// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
7221// operations propagate nans.
7223 const RISCVSubtarget &Subtarget) {
7224 SDLoc DL(Op);
7225 MVT VT = Op.getSimpleValueType();
7226
7227 SDValue X = Op.getOperand(0);
7228 SDValue Y = Op.getOperand(1);
7229
7230 if (!VT.isVector()) {
7231 MVT XLenVT = Subtarget.getXLenVT();
7232
7233 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
7234 // ensures that when one input is a nan, the other will also be a nan
7235 // allowing the nan to propagate. If both inputs are nan, this will swap the
7236 // inputs which is harmless.
7237
7238 SDValue NewY = Y;
7239 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
7240 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
7241 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
7242 }
7243
7244 SDValue NewX = X;
7245 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
7246 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
7247 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
7248 }
7249
7250 unsigned Opc =
7251 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
7252 return DAG.getNode(Opc, DL, VT, NewX, NewY);
7253 }
7254
7255 // Check no NaNs before converting to fixed vector scalable.
7256 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
7257 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
7258
7259 MVT ContainerVT = VT;
7260 if (VT.isFixedLengthVector()) {
7261 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
7262 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
7263 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
7264 }
7265
7266 SDValue Mask, VL;
7267 if (Op->isVPOpcode()) {
7268 Mask = Op.getOperand(2);
7269 if (VT.isFixedLengthVector())
7270 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
7271 Subtarget);
7272 VL = Op.getOperand(3);
7273 } else {
7274 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
7275 }
7276
7277 SDValue NewY = Y;
7278 if (!XIsNeverNan) {
7279 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
7280 {X, X, DAG.getCondCode(ISD::SETOEQ),
7281 DAG.getUNDEF(ContainerVT), Mask, VL});
7282 NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X,
7283 DAG.getUNDEF(ContainerVT), VL);
7284 }
7285
7286 SDValue NewX = X;
7287 if (!YIsNeverNan) {
7288 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
7289 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
7290 DAG.getUNDEF(ContainerVT), Mask, VL});
7291 NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y,
7292 DAG.getUNDEF(ContainerVT), VL);
7293 }
7294
7295 unsigned Opc =
7296 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
7297 ? RISCVISD::VFMAX_VL
7298 : RISCVISD::VFMIN_VL;
7299 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
7300 DAG.getUNDEF(ContainerVT), Mask, VL);
7301 if (VT.isFixedLengthVector())
7302 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
7303 return Res;
7304}
7305
7307 const RISCVSubtarget &Subtarget) {
7308 bool IsFABS = Op.getOpcode() == ISD::FABS;
7309 assert((IsFABS || Op.getOpcode() == ISD::FNEG) &&
7310 "Wrong opcode for lowering FABS or FNEG.");
7311
7312 MVT XLenVT = Subtarget.getXLenVT();
7313 MVT VT = Op.getSimpleValueType();
7314 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
7315
7316 SDLoc DL(Op);
7317 SDValue Fmv =
7318 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op.getOperand(0));
7319
7320 APInt Mask = IsFABS ? APInt::getSignedMaxValue(16) : APInt::getSignMask(16);
7321 Mask = Mask.sext(Subtarget.getXLen());
7322
7323 unsigned LogicOpc = IsFABS ? ISD::AND : ISD::XOR;
7324 SDValue Logic =
7325 DAG.getNode(LogicOpc, DL, XLenVT, Fmv, DAG.getConstant(Mask, DL, XLenVT));
7326 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, Logic);
7327}
7328
7330 const RISCVSubtarget &Subtarget) {
7331 assert(Op.getOpcode() == ISD::FCOPYSIGN && "Unexpected opcode");
7332
7333 MVT XLenVT = Subtarget.getXLenVT();
7334 MVT VT = Op.getSimpleValueType();
7335 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
7336
7337 SDValue Mag = Op.getOperand(0);
7338 SDValue Sign = Op.getOperand(1);
7339
7340 SDLoc DL(Op);
7341
7342 // Get sign bit into an integer value.
7343 unsigned SignSize = Sign.getValueSizeInBits();
7344 SDValue SignAsInt = [&]() {
7345 if (SignSize == Subtarget.getXLen())
7346 return DAG.getNode(ISD::BITCAST, DL, XLenVT, Sign);
7347 switch (SignSize) {
7348 case 16:
7349 return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Sign);
7350 case 32:
7351 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, XLenVT, Sign);
7352 case 64: {
7353 assert(XLenVT == MVT::i32 && "Unexpected type");
7354 // Copy the upper word to integer.
7355 SignSize = 32;
7356 return DAG.getNode(RISCVISD::SplitF64, DL, {MVT::i32, MVT::i32}, Sign)
7357 .getValue(1);
7358 }
7359 default:
7360 llvm_unreachable("Unexpected sign size");
7361 }
7362 }();
7363
7364 // Get the signbit at the right position for MagAsInt.
7365 if (int ShiftAmount = (int)SignSize - (int)Mag.getValueSizeInBits())
7366 SignAsInt = DAG.getNode(ShiftAmount > 0 ? ISD::SRL : ISD::SHL, DL, XLenVT,
7367 SignAsInt,
7368 DAG.getConstant(std::abs(ShiftAmount), DL, XLenVT));
7369
7370 // Mask the sign bit and any bits above it. The extra bits will be dropped
7371 // when we convert back to FP.
7372 SDValue SignMask = DAG.getConstant(
7373 APInt::getSignMask(16).sext(Subtarget.getXLen()), DL, XLenVT);
7374 SDValue SignBit = DAG.getNode(ISD::AND, DL, XLenVT, SignAsInt, SignMask);
7375
7376 // Transform Mag value to integer, and clear the sign bit.
7377 SDValue MagAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Mag);
7378 SDValue ClearSignMask = DAG.getConstant(
7379 APInt::getSignedMaxValue(16).sext(Subtarget.getXLen()), DL, XLenVT);
7380 SDValue ClearedSign =
7381 DAG.getNode(ISD::AND, DL, XLenVT, MagAsInt, ClearSignMask);
7382
7383 SDValue CopiedSign = DAG.getNode(ISD::OR, DL, XLenVT, ClearedSign, SignBit,
7385
7386 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, CopiedSign);
7387}
7388
7389/// Get a RISC-V target specified VL op for a given SDNode.
7390static unsigned getRISCVVLOp(SDValue Op) {
7391#define OP_CASE(NODE) \
7392 case ISD::NODE: \
7393 return RISCVISD::NODE##_VL;
7394#define VP_CASE(NODE) \
7395 case ISD::VP_##NODE: \
7396 return RISCVISD::NODE##_VL;
7397 // clang-format off
7398 switch (Op.getOpcode()) {
7399 default:
7400 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
7401 OP_CASE(ADD)
7402 OP_CASE(SUB)
7403 OP_CASE(MUL)
7404 OP_CASE(MULHS)
7405 OP_CASE(MULHU)
7406 OP_CASE(SDIV)
7407 OP_CASE(SREM)
7408 OP_CASE(UDIV)
7409 OP_CASE(UREM)
7410 OP_CASE(SHL)
7411 OP_CASE(SRA)
7412 OP_CASE(SRL)
7413 OP_CASE(ROTL)
7414 OP_CASE(ROTR)
7415 OP_CASE(BSWAP)
7416 OP_CASE(CTTZ)
7417 OP_CASE(CTLZ)
7418 OP_CASE(CTPOP)
7419 OP_CASE(BITREVERSE)
7420 OP_CASE(SADDSAT)
7421 OP_CASE(UADDSAT)
7422 OP_CASE(SSUBSAT)
7423 OP_CASE(USUBSAT)
7424 OP_CASE(AVGFLOORS)
7425 OP_CASE(AVGFLOORU)
7426 OP_CASE(AVGCEILS)
7427 OP_CASE(AVGCEILU)
7428 OP_CASE(FADD)
7429 OP_CASE(FSUB)
7430 OP_CASE(FMUL)
7431 OP_CASE(FDIV)
7432 OP_CASE(FNEG)
7433 OP_CASE(FABS)
7434 OP_CASE(FCOPYSIGN)
7435 OP_CASE(FSQRT)
7436 OP_CASE(SMIN)
7437 OP_CASE(SMAX)
7438 OP_CASE(UMIN)
7439 OP_CASE(UMAX)
7440 OP_CASE(STRICT_FADD)
7441 OP_CASE(STRICT_FSUB)
7442 OP_CASE(STRICT_FMUL)
7443 OP_CASE(STRICT_FDIV)
7444 OP_CASE(STRICT_FSQRT)
7445 VP_CASE(ADD) // VP_ADD
7446 VP_CASE(SUB) // VP_SUB
7447 VP_CASE(MUL) // VP_MUL
7448 VP_CASE(SDIV) // VP_SDIV
7449 VP_CASE(SREM) // VP_SREM
7450 VP_CASE(UDIV) // VP_UDIV
7451 VP_CASE(UREM) // VP_UREM
7452 VP_CASE(SHL) // VP_SHL
7453 VP_CASE(FADD) // VP_FADD
7454 VP_CASE(FSUB) // VP_FSUB
7455 VP_CASE(FMUL) // VP_FMUL
7456 VP_CASE(FDIV) // VP_FDIV
7457 VP_CASE(FNEG) // VP_FNEG
7458 VP_CASE(FABS) // VP_FABS
7459 VP_CASE(SMIN) // VP_SMIN
7460 VP_CASE(SMAX) // VP_SMAX
7461 VP_CASE(UMIN) // VP_UMIN
7462 VP_CASE(UMAX) // VP_UMAX
7463 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
7464 VP_CASE(SETCC) // VP_SETCC
7465 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
7466 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
7467 VP_CASE(BITREVERSE) // VP_BITREVERSE
7468 VP_CASE(SADDSAT) // VP_SADDSAT
7469 VP_CASE(UADDSAT) // VP_UADDSAT
7470 VP_CASE(SSUBSAT) // VP_SSUBSAT
7471 VP_CASE(USUBSAT) // VP_USUBSAT
7472 VP_CASE(BSWAP) // VP_BSWAP
7473 VP_CASE(CTLZ) // VP_CTLZ
7474 VP_CASE(CTTZ) // VP_CTTZ
7475 VP_CASE(CTPOP) // VP_CTPOP
7477 case ISD::VP_CTLZ_ZERO_UNDEF:
7478 return RISCVISD::CTLZ_VL;
7480 case ISD::VP_CTTZ_ZERO_UNDEF:
7481 return RISCVISD::CTTZ_VL;
7482 case ISD::FMA:
7483 case ISD::VP_FMA:
7484 return RISCVISD::VFMADD_VL;
7485 case ISD::STRICT_FMA:
7486 return RISCVISD::STRICT_VFMADD_VL;
7487 case ISD::AND:
7488 case ISD::VP_AND:
7489 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7490 return RISCVISD::VMAND_VL;
7491 return RISCVISD::AND_VL;
7492 case ISD::OR:
7493 case ISD::VP_OR:
7494 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7495 return RISCVISD::VMOR_VL;
7496 return RISCVISD::OR_VL;
7497 case ISD::XOR:
7498 case ISD::VP_XOR:
7499 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7500 return RISCVISD::VMXOR_VL;
7501 return RISCVISD::XOR_VL;
7502 case ISD::ANY_EXTEND:
7503 case ISD::ZERO_EXTEND:
7504 return RISCVISD::VZEXT_VL;
7505 case ISD::SIGN_EXTEND:
7506 return RISCVISD::VSEXT_VL;
7507 case ISD::SETCC:
7508 return RISCVISD::SETCC_VL;
7509 case ISD::VSELECT:
7510 return RISCVISD::VMERGE_VL;
7511 case ISD::VP_SELECT:
7512 case ISD::VP_MERGE:
7513 return RISCVISD::VMERGE_VL;
7514 case ISD::VP_SRA:
7515 return RISCVISD::SRA_VL;
7516 case ISD::VP_SRL:
7517 return RISCVISD::SRL_VL;
7518 case ISD::VP_SQRT:
7519 return RISCVISD::FSQRT_VL;
7520 case ISD::VP_SIGN_EXTEND:
7521 return RISCVISD::VSEXT_VL;
7522 case ISD::VP_ZERO_EXTEND:
7523 return RISCVISD::VZEXT_VL;
7524 case ISD::VP_FP_TO_SINT:
7525 return RISCVISD::VFCVT_RTZ_X_F_VL;
7526 case ISD::VP_FP_TO_UINT:
7527 return RISCVISD::VFCVT_RTZ_XU_F_VL;
7528 case ISD::FMINNUM:
7529 case ISD::FMINIMUMNUM:
7530 case ISD::VP_FMINNUM:
7531 return RISCVISD::VFMIN_VL;
7532 case ISD::FMAXNUM:
7533 case ISD::FMAXIMUMNUM:
7534 case ISD::VP_FMAXNUM:
7535 return RISCVISD::VFMAX_VL;
7536 case ISD::LRINT:
7537 case ISD::VP_LRINT:
7538 case ISD::LLRINT:
7539 case ISD::VP_LLRINT:
7540 return RISCVISD::VFCVT_RM_X_F_VL;
7541 }
7542 // clang-format on
7543#undef OP_CASE
7544#undef VP_CASE
7545}
7546
7548 const RISCVSubtarget &Subtarget) {
7549 return (Op.getValueType() == MVT::nxv32f16 &&
7550 (Subtarget.hasVInstructionsF16Minimal() &&
7551 !Subtarget.hasVInstructionsF16())) ||
7552 (Op.getValueType() == MVT::nxv32bf16 &&
7553 Subtarget.hasVInstructionsBF16Minimal() &&
7554 (!Subtarget.hasVInstructionsBF16() ||
7555 (!llvm::is_contained(ZvfbfaOps, Op.getOpcode()) &&
7556 !llvm::is_contained(ZvfbfaVPOps, Op.getOpcode()))));
7557}
7558
7560 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
7561 SDLoc DL(Op);
7562
7563 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
7564 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
7565
7566 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
7567 if (!Op.getOperand(j).getValueType().isVector()) {
7568 LoOperands[j] = Op.getOperand(j);
7569 HiOperands[j] = Op.getOperand(j);
7570 continue;
7571 }
7572 std::tie(LoOperands[j], HiOperands[j]) =
7573 DAG.SplitVector(Op.getOperand(j), DL);
7574 }
7575
7576 SDValue LoRes =
7577 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
7578 SDValue HiRes =
7579 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
7580
7581 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
7582}
7583
7585 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
7586 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
7587 SDLoc DL(Op);
7588
7589 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
7590 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
7591
7592 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
7593 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
7594 std::tie(LoOperands[j], HiOperands[j]) =
7595 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
7596 continue;
7597 }
7598 if (!Op.getOperand(j).getValueType().isVector()) {
7599 LoOperands[j] = Op.getOperand(j);
7600 HiOperands[j] = Op.getOperand(j);
7601 continue;
7602 }
7603 std::tie(LoOperands[j], HiOperands[j]) =
7604 DAG.SplitVector(Op.getOperand(j), DL);
7605 }
7606
7607 SDValue LoRes =
7608 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
7609 SDValue HiRes =
7610 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
7611
7612 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
7613}
7614
7616 SDLoc DL(Op);
7617
7618 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
7619 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
7620 auto [EVLLo, EVLHi] =
7621 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
7622
7623 SDValue ResLo =
7624 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7625 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
7626 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7627 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
7628}
7629
7631
7632 assert(Op->isStrictFPOpcode());
7633
7634 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
7635
7636 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
7637 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
7638
7639 SDLoc DL(Op);
7640
7641 SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
7642 SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
7643
7644 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
7645 if (!Op.getOperand(j).getValueType().isVector()) {
7646 LoOperands[j] = Op.getOperand(j);
7647 HiOperands[j] = Op.getOperand(j);
7648 continue;
7649 }
7650 std::tie(LoOperands[j], HiOperands[j]) =
7651 DAG.SplitVector(Op.getOperand(j), DL);
7652 }
7653
7654 SDValue LoRes =
7655 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
7656 HiOperands[0] = LoRes.getValue(1);
7657 SDValue HiRes =
7658 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
7659
7660 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
7661 LoRes.getValue(0), HiRes.getValue(0));
7662 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
7663}
7664
7665SDValue
7666RISCVTargetLowering::lowerXAndesBfHCvtBFloat16Load(SDValue Op,
7667 SelectionDAG &DAG) const {
7668 assert(Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh() &&
7669 "Unexpected bfloat16 load lowering");
7670
7671 SDLoc DL(Op);
7672 LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
7673 EVT MemVT = LD->getMemoryVT();
7674 SDValue Load = DAG.getExtLoad(
7675 ISD::ZEXTLOAD, DL, Subtarget.getXLenVT(), LD->getChain(),
7676 LD->getBasePtr(),
7678 LD->getMemOperand());
7679 // Using mask to make bf16 nan-boxing valid when we don't have flh
7680 // instruction. -65536 would be treat as a small number and thus it can be
7681 // directly used lui to get the constant.
7682 SDValue mask = DAG.getSignedConstant(-65536, DL, Subtarget.getXLenVT());
7683 SDValue OrSixteenOne =
7684 DAG.getNode(ISD::OR, DL, Load.getValueType(), {Load, mask});
7685 SDValue ConvertedResult =
7686 DAG.getNode(RISCVISD::NDS_FMV_BF16_X, DL, MVT::bf16, OrSixteenOne);
7687 return DAG.getMergeValues({ConvertedResult, Load.getValue(1)}, DL);
7688}
7689
7690SDValue
7691RISCVTargetLowering::lowerXAndesBfHCvtBFloat16Store(SDValue Op,
7692 SelectionDAG &DAG) const {
7693 assert(Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh() &&
7694 "Unexpected bfloat16 store lowering");
7695
7696 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
7697 SDLoc DL(Op);
7698 SDValue FMV = DAG.getNode(RISCVISD::NDS_FMV_X_ANYEXTBF16, DL,
7699 Subtarget.getXLenVT(), ST->getValue());
7700 return DAG.getTruncStore(
7701 ST->getChain(), DL, FMV, ST->getBasePtr(),
7702 EVT::getIntegerVT(*DAG.getContext(), ST->getMemoryVT().getSizeInBits()),
7703 ST->getMemOperand());
7704}
7705
7707 SelectionDAG &DAG) const {
7708 switch (Op.getOpcode()) {
7709 default:
7711 "Unimplemented RISCVTargetLowering::LowerOperation Case");
7712 case ISD::PREFETCH:
7713 return LowerPREFETCH(Op, Subtarget, DAG);
7714 case ISD::ATOMIC_FENCE:
7715 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
7716 case ISD::GlobalAddress:
7717 return lowerGlobalAddress(Op, DAG);
7718 case ISD::BlockAddress:
7719 return lowerBlockAddress(Op, DAG);
7720 case ISD::ConstantPool:
7721 return lowerConstantPool(Op, DAG);
7722 case ISD::JumpTable:
7723 return lowerJumpTable(Op, DAG);
7725 return lowerGlobalTLSAddress(Op, DAG);
7726 case ISD::Constant:
7727 return lowerConstant(Op, DAG, Subtarget);
7728 case ISD::ConstantFP:
7729 return lowerConstantFP(Op, DAG);
7730 case ISD::SELECT:
7731 return lowerSELECT(Op, DAG);
7732 case ISD::BRCOND:
7733 return lowerBRCOND(Op, DAG);
7734 case ISD::VASTART:
7735 return lowerVASTART(Op, DAG);
7736 case ISD::FRAMEADDR:
7737 return lowerFRAMEADDR(Op, DAG);
7738 case ISD::RETURNADDR:
7739 return lowerRETURNADDR(Op, DAG);
7740 case ISD::SHL_PARTS:
7741 return lowerShiftLeftParts(Op, DAG);
7742 case ISD::SRA_PARTS:
7743 return lowerShiftRightParts(Op, DAG, true);
7744 case ISD::SRL_PARTS:
7745 return lowerShiftRightParts(Op, DAG, false);
7746 case ISD::ROTL:
7747 case ISD::ROTR:
7748 if (Op.getValueType().isFixedLengthVector()) {
7749 assert(Subtarget.hasStdExtZvkb());
7750 return lowerToScalableOp(Op, DAG);
7751 }
7752 assert(Subtarget.hasVendorXTHeadBb() &&
7753 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
7754 "Unexpected custom legalization");
7755 // XTHeadBb only supports rotate by constant.
7756 if (!isa<ConstantSDNode>(Op.getOperand(1)))
7757 return SDValue();
7758 return Op;
7759 case ISD::BITCAST: {
7760 SDLoc DL(Op);
7761 EVT VT = Op.getValueType();
7762 SDValue Op0 = Op.getOperand(0);
7763 EVT Op0VT = Op0.getValueType();
7764 MVT XLenVT = Subtarget.getXLenVT();
7765 if (Op0VT == MVT::i16 &&
7766 ((VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
7767 (VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
7768 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
7769 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, NewOp0);
7770 }
7771 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
7772 Subtarget.hasStdExtFOrZfinx()) {
7773 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
7774 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
7775 }
7776 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit() &&
7777 Subtarget.hasStdExtDOrZdinx()) {
7778 SDValue Lo, Hi;
7779 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
7780 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
7781 }
7782
7783 if (Subtarget.enablePExtCodeGen()) {
7784 bool Is32BitCast =
7785 (VT == MVT::i32 && (Op0VT == MVT::v4i8 || Op0VT == MVT::v2i16)) ||
7786 (Op0VT == MVT::i32 && (VT == MVT::v4i8 || VT == MVT::v2i16));
7787 bool Is64BitCast =
7788 (VT == MVT::i64 && (Op0VT == MVT::v8i8 || Op0VT == MVT::v4i16 ||
7789 Op0VT == MVT::v2i32)) ||
7790 (Op0VT == MVT::i64 &&
7791 (VT == MVT::v8i8 || VT == MVT::v4i16 || VT == MVT::v2i32));
7792 if (Is32BitCast || Is64BitCast)
7793 return Op;
7794 }
7795
7796 // Consider other scalar<->scalar casts as legal if the types are legal.
7797 // Otherwise expand them.
7798 if (!VT.isVector() && !Op0VT.isVector()) {
7799 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
7800 return Op;
7801 return SDValue();
7802 }
7803
7804 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
7805 "Unexpected types");
7806
7807 if (VT.isFixedLengthVector()) {
7808 // We can handle fixed length vector bitcasts with a simple replacement
7809 // in isel.
7810 if (Op0VT.isFixedLengthVector())
7811 return Op;
7812 // When bitcasting from scalar to fixed-length vector, insert the scalar
7813 // into a one-element vector of the result type, and perform a vector
7814 // bitcast.
7815 if (!Op0VT.isVector()) {
7816 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
7817 if (!isTypeLegal(BVT))
7818 return SDValue();
7819 return DAG.getBitcast(
7820 VT, DAG.getInsertVectorElt(DL, DAG.getUNDEF(BVT), Op0, 0));
7821 }
7822 return SDValue();
7823 }
7824 // Custom-legalize bitcasts from fixed-length vector types to scalar types
7825 // thus: bitcast the vector to a one-element vector type whose element type
7826 // is the same as the result type, and extract the first element.
7827 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
7828 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
7829 if (!isTypeLegal(BVT))
7830 return SDValue();
7831 SDValue BVec = DAG.getBitcast(BVT, Op0);
7832 return DAG.getExtractVectorElt(DL, VT, BVec, 0);
7833 }
7834 return SDValue();
7835 }
7837 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
7839 return LowerINTRINSIC_W_CHAIN(Op, DAG);
7841 return LowerINTRINSIC_VOID(Op, DAG);
7842 case ISD::IS_FPCLASS:
7843 return LowerIS_FPCLASS(Op, DAG);
7844 case ISD::BITREVERSE: {
7845 MVT VT = Op.getSimpleValueType();
7846 if (VT.isFixedLengthVector()) {
7847 assert(Subtarget.hasStdExtZvbb());
7848 return lowerToScalableOp(Op, DAG);
7849 }
7850 SDLoc DL(Op);
7851 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
7852 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
7853 // Expand bitreverse to a bswap(rev8) followed by brev8.
7854 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
7855 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
7856 }
7857 case ISD::TRUNCATE:
7860 // Only custom-lower vector truncates
7861 if (!Op.getSimpleValueType().isVector())
7862 return Op;
7863 return lowerVectorTruncLike(Op, DAG);
7864 case ISD::ANY_EXTEND:
7865 case ISD::ZERO_EXTEND:
7866 if (Op.getOperand(0).getValueType().isVector() &&
7867 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7868 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
7869 if (Op.getValueType().isScalableVector())
7870 return Op;
7871 return lowerToScalableOp(Op, DAG);
7872 case ISD::SIGN_EXTEND:
7873 if (Op.getOperand(0).getValueType().isVector() &&
7874 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7875 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
7876 if (Op.getValueType().isScalableVector())
7877 return Op;
7878 return lowerToScalableOp(Op, DAG);
7880 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
7882 return lowerINSERT_VECTOR_ELT(Op, DAG);
7884 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
7885 case ISD::SCALAR_TO_VECTOR: {
7886 MVT VT = Op.getSimpleValueType();
7887 SDLoc DL(Op);
7888 SDValue Scalar = Op.getOperand(0);
7889 if (VT.getVectorElementType() == MVT::i1) {
7890 MVT WideVT = VT.changeVectorElementType(MVT::i8);
7891 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
7892 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
7893 }
7894 MVT ContainerVT = VT;
7895 if (VT.isFixedLengthVector())
7896 ContainerVT = getContainerForFixedLengthVector(VT);
7897 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
7898
7899 SDValue V;
7900 if (VT.isFloatingPoint()) {
7901 V = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, ContainerVT,
7902 DAG.getUNDEF(ContainerVT), Scalar, VL);
7903 } else {
7904 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
7905 V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
7906 DAG.getUNDEF(ContainerVT), Scalar, VL);
7907 }
7908 if (VT.isFixedLengthVector())
7909 V = convertFromScalableVector(VT, V, DAG, Subtarget);
7910 return V;
7911 }
7912 case ISD::VSCALE: {
7913 MVT XLenVT = Subtarget.getXLenVT();
7914 MVT VT = Op.getSimpleValueType();
7915 SDLoc DL(Op);
7916 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
7917 // We define our scalable vector types for lmul=1 to use a 64 bit known
7918 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
7919 // vscale as VLENB / 8.
7920 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
7921 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
7922 reportFatalInternalError("Support for VLEN==32 is incomplete.");
7923 // We assume VLENB is a multiple of 8. We manually choose the best shift
7924 // here because SimplifyDemandedBits isn't always able to simplify it.
7925 uint64_t Val = Op.getConstantOperandVal(0);
7926 if (isPowerOf2_64(Val)) {
7927 uint64_t Log2 = Log2_64(Val);
7928 if (Log2 < 3) {
7929 SDNodeFlags Flags;
7930 Flags.setExact(true);
7931 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
7932 DAG.getConstant(3 - Log2, DL, XLenVT), Flags);
7933 } else if (Log2 > 3) {
7934 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
7935 DAG.getConstant(Log2 - 3, DL, XLenVT));
7936 }
7937 } else if ((Val % 8) == 0) {
7938 // If the multiplier is a multiple of 8, scale it down to avoid needing
7939 // to shift the VLENB value.
7940 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
7941 DAG.getConstant(Val / 8, DL, XLenVT));
7942 } else {
7943 SDNodeFlags Flags;
7944 Flags.setExact(true);
7945 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
7946 DAG.getConstant(3, DL, XLenVT), Flags);
7947 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
7948 DAG.getConstant(Val, DL, XLenVT));
7949 }
7950 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
7951 }
7952 case ISD::FPOWI: {
7953 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
7954 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
7955 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
7956 Op.getOperand(1).getValueType() == MVT::i32) {
7957 SDLoc DL(Op);
7958 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
7959 SDValue Powi =
7960 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
7961 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
7962 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7963 }
7964 return SDValue();
7965 }
7966 case ISD::FMAXIMUM:
7967 case ISD::FMINIMUM:
7968 if (isPromotedOpNeedingSplit(Op, Subtarget))
7969 return SplitVectorOp(Op, DAG);
7970 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
7971 case ISD::FP_EXTEND:
7972 case ISD::FP_ROUND:
7973 return lowerVectorFPExtendOrRoundLike(Op, DAG);
7976 return lowerStrictFPExtendOrRoundLike(Op, DAG);
7977 case ISD::SINT_TO_FP:
7978 case ISD::UINT_TO_FP:
7979 if (Op.getValueType().isVector() &&
7980 ((Op.getValueType().getScalarType() == MVT::f16 &&
7981 (Subtarget.hasVInstructionsF16Minimal() &&
7982 !Subtarget.hasVInstructionsF16())) ||
7983 Op.getValueType().getScalarType() == MVT::bf16)) {
7984 if (isPromotedOpNeedingSplit(Op, Subtarget))
7985 return SplitVectorOp(Op, DAG);
7986 // int -> f32
7987 SDLoc DL(Op);
7988 MVT NVT =
7989 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
7990 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
7991 // f32 -> [b]f16
7992 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
7993 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7994 }
7995 [[fallthrough]];
7996 case ISD::FP_TO_SINT:
7997 case ISD::FP_TO_UINT:
7998 if (SDValue Op1 = Op.getOperand(0);
7999 Op1.getValueType().isVector() &&
8000 ((Op1.getValueType().getScalarType() == MVT::f16 &&
8001 (Subtarget.hasVInstructionsF16Minimal() &&
8002 !Subtarget.hasVInstructionsF16())) ||
8003 Op1.getValueType().getScalarType() == MVT::bf16)) {
8004 if (isPromotedOpNeedingSplit(Op1, Subtarget))
8005 return SplitVectorOp(Op, DAG);
8006 // [b]f16 -> f32
8007 SDLoc DL(Op);
8008 MVT NVT = MVT::getVectorVT(MVT::f32,
8009 Op1.getValueType().getVectorElementCount());
8010 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
8011 // f32 -> int
8012 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
8013 }
8014 [[fallthrough]];
8019 // RVV can only do fp<->int conversions to types half/double the size as
8020 // the source. We custom-lower any conversions that do two hops into
8021 // sequences.
8022 MVT VT = Op.getSimpleValueType();
8023 if (VT.isScalarInteger())
8024 return lowerFP_TO_INT(Op, DAG, Subtarget);
8025 bool IsStrict = Op->isStrictFPOpcode();
8026 SDValue Src = Op.getOperand(0 + IsStrict);
8027 MVT SrcVT = Src.getSimpleValueType();
8028 if (SrcVT.isScalarInteger())
8029 return lowerINT_TO_FP(Op, DAG, Subtarget);
8030 if (!VT.isVector())
8031 return Op;
8032 SDLoc DL(Op);
8033 MVT EltVT = VT.getVectorElementType();
8034 MVT SrcEltVT = SrcVT.getVectorElementType();
8035 unsigned EltSize = EltVT.getSizeInBits();
8036 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
8037 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
8038 "Unexpected vector element types");
8039
8040 bool IsInt2FP = SrcEltVT.isInteger();
8041 // Widening conversions
8042 if (EltSize > (2 * SrcEltSize)) {
8043 if (IsInt2FP) {
8044 // Do a regular integer sign/zero extension then convert to float.
8045 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
8047 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
8048 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
8051 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
8052 if (IsStrict)
8053 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
8054 Op.getOperand(0), Ext);
8055 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
8056 }
8057 // FP2Int
8058 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
8059 // Do one doubling fp_extend then complete the operation by converting
8060 // to int.
8061 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
8062 if (IsStrict) {
8063 auto [FExt, Chain] =
8064 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
8065 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
8066 }
8067 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
8068 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
8069 }
8070
8071 // Narrowing conversions
8072 if (SrcEltSize > (2 * EltSize)) {
8073 if (IsInt2FP) {
8074 // One narrowing int_to_fp, then an fp_round.
8075 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
8076 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
8077 if (IsStrict) {
8078 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
8079 DAG.getVTList(InterimFVT, MVT::Other),
8080 Op.getOperand(0), Src);
8081 SDValue Chain = Int2FP.getValue(1);
8082 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
8083 }
8084 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
8085 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
8086 }
8087 // FP2Int
8088 // One narrowing fp_to_int, then truncate the integer. If the float isn't
8089 // representable by the integer, the result is poison.
8090 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
8092 if (IsStrict) {
8093 SDValue FP2Int =
8094 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
8095 Op.getOperand(0), Src);
8096 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
8097 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
8098 }
8099 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
8100 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
8101 }
8102
8103 // Scalable vectors can exit here. Patterns will handle equally-sized
8104 // conversions halving/doubling ones.
8105 if (!VT.isFixedLengthVector())
8106 return Op;
8107
8108 // For fixed-length vectors we lower to a custom "VL" node.
8109 unsigned RVVOpc = 0;
8110 switch (Op.getOpcode()) {
8111 default:
8112 llvm_unreachable("Impossible opcode");
8113 case ISD::FP_TO_SINT:
8114 RVVOpc = RISCVISD::VFCVT_RTZ_X_F_VL;
8115 break;
8116 case ISD::FP_TO_UINT:
8117 RVVOpc = RISCVISD::VFCVT_RTZ_XU_F_VL;
8118 break;
8119 case ISD::SINT_TO_FP:
8120 RVVOpc = RISCVISD::SINT_TO_FP_VL;
8121 break;
8122 case ISD::UINT_TO_FP:
8123 RVVOpc = RISCVISD::UINT_TO_FP_VL;
8124 break;
8126 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_X_F_VL;
8127 break;
8129 RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_XU_F_VL;
8130 break;
8132 RVVOpc = RISCVISD::STRICT_SINT_TO_FP_VL;
8133 break;
8135 RVVOpc = RISCVISD::STRICT_UINT_TO_FP_VL;
8136 break;
8137 }
8138
8139 MVT ContainerVT = getContainerForFixedLengthVector(VT);
8140 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8141 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
8142 "Expected same element count");
8143
8144 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
8145
8146 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8147 if (IsStrict) {
8148 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
8149 Op.getOperand(0), Src, Mask, VL);
8150 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
8151 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
8152 }
8153 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
8154 return convertFromScalableVector(VT, Src, DAG, Subtarget);
8155 }
8158 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
8159 case ISD::FP_TO_BF16: {
8160 // Custom lower to ensure the libcall return is passed in an FPR on hard
8161 // float ABIs.
8162 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
8163 SDLoc DL(Op);
8164 MakeLibCallOptions CallOptions;
8165 RTLIB::Libcall LC =
8166 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
8167 SDValue Res =
8168 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
8169 if (Subtarget.is64Bit())
8170 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
8171 return DAG.getBitcast(MVT::i32, Res);
8172 }
8173 case ISD::BF16_TO_FP: {
8174 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
8175 MVT VT = Op.getSimpleValueType();
8176 SDLoc DL(Op);
8177 Op = DAG.getNode(
8178 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
8179 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
8180 SDValue Res = Subtarget.is64Bit()
8181 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
8182 : DAG.getBitcast(MVT::f32, Op);
8183 // fp_extend if the target VT is bigger than f32.
8184 if (VT != MVT::f32)
8185 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
8186 return Res;
8187 }
8189 case ISD::FP_TO_FP16: {
8190 // Custom lower to ensure the libcall return is passed in an FPR on hard
8191 // float ABIs.
8192 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
8193 SDLoc DL(Op);
8194 MakeLibCallOptions CallOptions;
8195 bool IsStrict = Op->isStrictFPOpcode();
8196 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
8197 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8198 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
8199 SDValue Res;
8200 std::tie(Res, Chain) =
8201 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
8202 if (Subtarget.is64Bit())
8203 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
8204 SDValue Result = DAG.getBitcast(MVT::i32, IsStrict ? Res.getValue(0) : Res);
8205 if (IsStrict)
8206 return DAG.getMergeValues({Result, Chain}, DL);
8207 return Result;
8208 }
8210 case ISD::FP16_TO_FP: {
8211 // Custom lower to ensure the libcall argument is passed in an FPR on hard
8212 // float ABIs.
8213 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
8214 SDLoc DL(Op);
8215 MakeLibCallOptions CallOptions;
8216 bool IsStrict = Op->isStrictFPOpcode();
8217 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
8218 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8219 SDValue Arg = Subtarget.is64Bit()
8220 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op0)
8221 : DAG.getBitcast(MVT::f32, Op0);
8222 SDValue Res;
8223 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
8224 CallOptions, DL, Chain);
8225 if (IsStrict)
8226 return DAG.getMergeValues({Res, Chain}, DL);
8227 return Res;
8228 }
8229 case ISD::FTRUNC:
8230 case ISD::FCEIL:
8231 case ISD::FFLOOR:
8232 case ISD::FNEARBYINT:
8233 case ISD::FRINT:
8234 case ISD::FROUND:
8235 case ISD::FROUNDEVEN:
8236 if (isPromotedOpNeedingSplit(Op, Subtarget))
8237 return SplitVectorOp(Op, DAG);
8238 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
8239 case ISD::LRINT:
8240 case ISD::LLRINT:
8241 case ISD::LROUND:
8242 case ISD::LLROUND: {
8243 if (Op.getValueType().isVector())
8244 return lowerVectorXRINT_XROUND(Op, DAG, Subtarget);
8245 assert(Op.getOperand(0).getValueType() == MVT::f16 &&
8246 "Unexpected custom legalisation");
8247 SDLoc DL(Op);
8248 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
8249 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), Ext);
8250 }
8251 case ISD::STRICT_LRINT:
8252 case ISD::STRICT_LLRINT:
8253 case ISD::STRICT_LROUND:
8254 case ISD::STRICT_LLROUND: {
8255 assert(Op.getOperand(1).getValueType() == MVT::f16 &&
8256 "Unexpected custom legalisation");
8257 SDLoc DL(Op);
8258 SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
8259 {Op.getOperand(0), Op.getOperand(1)});
8260 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
8261 {Ext.getValue(1), Ext.getValue(0)});
8262 }
8263 case ISD::VECREDUCE_ADD:
8268 return lowerVECREDUCE(Op, DAG);
8269 case ISD::VECREDUCE_AND:
8270 case ISD::VECREDUCE_OR:
8271 case ISD::VECREDUCE_XOR:
8272 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
8273 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
8274 return lowerVECREDUCE(Op, DAG);
8281 return lowerFPVECREDUCE(Op, DAG);
8282 case ISD::VP_REDUCE_ADD:
8283 case ISD::VP_REDUCE_UMAX:
8284 case ISD::VP_REDUCE_SMAX:
8285 case ISD::VP_REDUCE_UMIN:
8286 case ISD::VP_REDUCE_SMIN:
8287 case ISD::VP_REDUCE_FADD:
8288 case ISD::VP_REDUCE_SEQ_FADD:
8289 case ISD::VP_REDUCE_FMIN:
8290 case ISD::VP_REDUCE_FMAX:
8291 case ISD::VP_REDUCE_FMINIMUM:
8292 case ISD::VP_REDUCE_FMAXIMUM:
8293 if (isPromotedOpNeedingSplit(Op.getOperand(1), Subtarget))
8294 return SplitVectorReductionOp(Op, DAG);
8295 return lowerVPREDUCE(Op, DAG);
8296 case ISD::VP_REDUCE_AND:
8297 case ISD::VP_REDUCE_OR:
8298 case ISD::VP_REDUCE_XOR:
8299 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
8300 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
8301 return lowerVPREDUCE(Op, DAG);
8302 case ISD::VP_CTTZ_ELTS:
8303 case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
8304 return lowerVPCttzElements(Op, DAG);
8305 case ISD::UNDEF: {
8306 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
8307 return convertFromScalableVector(Op.getSimpleValueType(),
8308 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
8309 }
8311 return lowerINSERT_SUBVECTOR(Op, DAG);
8313 return lowerEXTRACT_SUBVECTOR(Op, DAG);
8315 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
8317 return lowerVECTOR_INTERLEAVE(Op, DAG);
8318 case ISD::STEP_VECTOR:
8319 return lowerSTEP_VECTOR(Op, DAG);
8321 return lowerVECTOR_REVERSE(Op, DAG);
8322 case ISD::VECTOR_SPLICE:
8323 return lowerVECTOR_SPLICE(Op, DAG);
8324 case ISD::BUILD_VECTOR: {
8325 MVT VT = Op.getSimpleValueType();
8326 MVT EltVT = VT.getVectorElementType();
8327 if (!Subtarget.is64Bit() && EltVT == MVT::i64)
8328 return lowerBuildVectorViaVID(Op, DAG, Subtarget);
8329 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
8330 }
8331 case ISD::SPLAT_VECTOR: {
8332 MVT VT = Op.getSimpleValueType();
8333 MVT EltVT = VT.getVectorElementType();
8334 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
8335 EltVT == MVT::bf16) {
8336 SDLoc DL(Op);
8337 SDValue Elt;
8338 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
8339 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
8340 Elt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(),
8341 Op.getOperand(0));
8342 else
8343 Elt = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Op.getOperand(0));
8344 MVT IVT = VT.changeVectorElementType(MVT::i16);
8345 return DAG.getNode(ISD::BITCAST, DL, VT,
8346 DAG.getNode(ISD::SPLAT_VECTOR, DL, IVT, Elt));
8347 }
8348
8349 if (EltVT == MVT::i1)
8350 return lowerVectorMaskSplat(Op, DAG);
8351 return SDValue();
8352 }
8354 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
8355 case ISD::CONCAT_VECTORS: {
8356 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
8357 // better than going through the stack, as the default expansion does.
8358 SDLoc DL(Op);
8359 MVT VT = Op.getSimpleValueType();
8360 MVT ContainerVT = VT;
8361 if (VT.isFixedLengthVector())
8362 ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
8363
8364 // Recursively split concat_vectors with more than 2 operands:
8365 //
8366 // concat_vector op1, op2, op3, op4
8367 // ->
8368 // concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
8369 //
8370 // This reduces the length of the chain of vslideups and allows us to
8371 // perform the vslideups at a smaller LMUL, limited to MF2.
8372 if (Op.getNumOperands() > 2 &&
8373 ContainerVT.bitsGE(RISCVTargetLowering::getM1VT(ContainerVT))) {
8374 MVT HalfVT = VT.getHalfNumVectorElementsVT();
8375 assert(isPowerOf2_32(Op.getNumOperands()));
8376 size_t HalfNumOps = Op.getNumOperands() / 2;
8377 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
8378 Op->ops().take_front(HalfNumOps));
8379 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
8380 Op->ops().drop_front(HalfNumOps));
8381 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
8382 }
8383
8384 unsigned NumOpElts =
8385 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
8386 SDValue Vec = DAG.getUNDEF(VT);
8387 for (const auto &OpIdx : enumerate(Op->ops())) {
8388 SDValue SubVec = OpIdx.value();
8389 // Don't insert undef subvectors.
8390 if (SubVec.isUndef())
8391 continue;
8392 Vec = DAG.getInsertSubvector(DL, Vec, SubVec, OpIdx.index() * NumOpElts);
8393 }
8394 return Vec;
8395 }
8396 case ISD::LOAD: {
8397 auto *Load = cast<LoadSDNode>(Op);
8398 EVT VT = Load->getValueType(0);
8399 if (VT == MVT::f64) {
8400 assert(Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
8401 !Subtarget.is64Bit() && "Unexpected custom legalisation");
8402
8403 // Replace a double precision load with two i32 loads and a BuildPairF64.
8404 SDLoc DL(Op);
8405 SDValue BasePtr = Load->getBasePtr();
8406 SDValue Chain = Load->getChain();
8407
8408 SDValue Lo =
8409 DAG.getLoad(MVT::i32, DL, Chain, BasePtr, Load->getPointerInfo(),
8410 Load->getBaseAlign(), Load->getMemOperand()->getFlags());
8411 BasePtr = DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(4));
8412 SDValue Hi = DAG.getLoad(
8413 MVT::i32, DL, Chain, BasePtr, Load->getPointerInfo().getWithOffset(4),
8414 Load->getBaseAlign(), Load->getMemOperand()->getFlags());
8415 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
8416 Hi.getValue(1));
8417
8418 SDValue Pair = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
8419 return DAG.getMergeValues({Pair, Chain}, DL);
8420 }
8421
8422 if (VT == MVT::bf16)
8423 return lowerXAndesBfHCvtBFloat16Load(Op, DAG);
8424
8425 // Handle normal vector tuple load.
8426 if (VT.isRISCVVectorTuple()) {
8427 SDLoc DL(Op);
8428 MVT XLenVT = Subtarget.getXLenVT();
8429 unsigned NF = VT.getRISCVVectorTupleNumFields();
8430 unsigned Sz = VT.getSizeInBits().getKnownMinValue();
8431 unsigned NumElts = Sz / (NF * 8);
8432 int Log2LMUL = Log2_64(NumElts) - 3;
8433
8434 auto Flag = SDNodeFlags();
8435 Flag.setNoUnsignedWrap(true);
8436 SDValue Ret = DAG.getUNDEF(VT);
8437 SDValue BasePtr = Load->getBasePtr();
8438 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
8439 VROffset =
8440 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
8441 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
8442 SmallVector<SDValue, 8> OutChains;
8443
8444 // Load NF vector registers and combine them to a vector tuple.
8445 for (unsigned i = 0; i < NF; ++i) {
8446 SDValue LoadVal = DAG.getLoad(
8447 MVT::getScalableVectorVT(MVT::i8, NumElts), DL, Load->getChain(),
8448 BasePtr, MachinePointerInfo(Load->getAddressSpace()), Align(8));
8449 OutChains.push_back(LoadVal.getValue(1));
8450 Ret = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Ret, LoadVal,
8451 DAG.getTargetConstant(i, DL, MVT::i32));
8452 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
8453 }
8454 return DAG.getMergeValues(
8455 {Ret, DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains)}, DL);
8456 }
8457
8458 if (auto V = expandUnalignedRVVLoad(Op, DAG))
8459 return V;
8460 if (Op.getValueType().isFixedLengthVector())
8461 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
8462 return Op;
8463 }
8464 case ISD::STORE: {
8465 auto *Store = cast<StoreSDNode>(Op);
8466 SDValue StoredVal = Store->getValue();
8467 EVT VT = StoredVal.getValueType();
8468 if (Subtarget.enablePExtCodeGen()) {
8469 if (VT == MVT::v2i16 || VT == MVT::v4i8) {
8470 SDValue DL(Op);
8471 SDValue Cast = DAG.getBitcast(MVT::i32, StoredVal);
8472 SDValue NewStore =
8473 DAG.getStore(Store->getChain(), DL, Cast, Store->getBasePtr(),
8474 Store->getPointerInfo(), Store->getBaseAlign(),
8475 Store->getMemOperand()->getFlags());
8476 return NewStore;
8477 }
8478 }
8479 if (VT == MVT::f64) {
8480 assert(Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
8481 !Subtarget.is64Bit() && "Unexpected custom legalisation");
8482
8483 // Replace a double precision store with a SplitF64 and i32 stores.
8484 SDValue DL(Op);
8485 SDValue BasePtr = Store->getBasePtr();
8486 SDValue Chain = Store->getChain();
8487 SDValue Split = DAG.getNode(RISCVISD::SplitF64, DL,
8488 DAG.getVTList(MVT::i32, MVT::i32), StoredVal);
8489
8490 SDValue Lo = DAG.getStore(Chain, DL, Split.getValue(0), BasePtr,
8491 Store->getPointerInfo(), Store->getBaseAlign(),
8492 Store->getMemOperand()->getFlags());
8493 BasePtr = DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(4));
8494 SDValue Hi = DAG.getStore(Chain, DL, Split.getValue(1), BasePtr,
8495 Store->getPointerInfo().getWithOffset(4),
8496 Store->getBaseAlign(),
8497 Store->getMemOperand()->getFlags());
8498 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
8499 }
8500 if (VT == MVT::i64) {
8501 assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&
8502 "Unexpected custom legalisation");
8503 if (Store->isTruncatingStore())
8504 return SDValue();
8505
8506 if (Store->getAlign() < Subtarget.getZilsdAlign())
8507 return SDValue();
8508
8509 SDLoc DL(Op);
8510 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, StoredVal,
8511 DAG.getTargetConstant(0, DL, MVT::i32));
8512 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, StoredVal,
8513 DAG.getTargetConstant(1, DL, MVT::i32));
8514
8515 return DAG.getMemIntrinsicNode(
8516 RISCVISD::SD_RV32, DL, DAG.getVTList(MVT::Other),
8517 {Store->getChain(), Lo, Hi, Store->getBasePtr()}, MVT::i64,
8518 Store->getMemOperand());
8519 }
8520
8521 if (VT == MVT::bf16)
8522 return lowerXAndesBfHCvtBFloat16Store(Op, DAG);
8523
8524 // Handle normal vector tuple store.
8525 if (VT.isRISCVVectorTuple()) {
8526 SDLoc DL(Op);
8527 MVT XLenVT = Subtarget.getXLenVT();
8528 unsigned NF = VT.getRISCVVectorTupleNumFields();
8529 unsigned Sz = VT.getSizeInBits().getKnownMinValue();
8530 unsigned NumElts = Sz / (NF * 8);
8531 int Log2LMUL = Log2_64(NumElts) - 3;
8532
8533 auto Flag = SDNodeFlags();
8534 Flag.setNoUnsignedWrap(true);
8535 SDValue Ret;
8536 SDValue Chain = Store->getChain();
8537 SDValue BasePtr = Store->getBasePtr();
8538 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
8539 VROffset =
8540 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
8541 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
8542
8543 // Extract subregisters in a vector tuple and store them individually.
8544 for (unsigned i = 0; i < NF; ++i) {
8545 auto Extract =
8546 DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL,
8547 MVT::getScalableVectorVT(MVT::i8, NumElts), StoredVal,
8548 DAG.getTargetConstant(i, DL, MVT::i32));
8549 Ret = DAG.getStore(Chain, DL, Extract, BasePtr,
8550 MachinePointerInfo(Store->getAddressSpace()),
8551 Store->getBaseAlign(),
8552 Store->getMemOperand()->getFlags());
8553 Chain = Ret.getValue(0);
8554 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
8555 }
8556 return Ret;
8557 }
8558
8559 if (auto V = expandUnalignedRVVStore(Op, DAG))
8560 return V;
8561 if (Op.getOperand(1).getValueType().isFixedLengthVector())
8562 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
8563 return Op;
8564 }
8565 case ISD::VP_LOAD:
8566 if (SDValue V = expandUnalignedVPLoad(Op, DAG))
8567 return V;
8568 [[fallthrough]];
8569 case ISD::MLOAD:
8570 return lowerMaskedLoad(Op, DAG);
8571 case ISD::VP_LOAD_FF:
8572 return lowerLoadFF(Op, DAG);
8573 case ISD::VP_STORE:
8574 if (SDValue V = expandUnalignedVPStore(Op, DAG))
8575 return V;
8576 [[fallthrough]];
8577 case ISD::MSTORE:
8578 return lowerMaskedStore(Op, DAG);
8580 return lowerVectorCompress(Op, DAG);
8581 case ISD::SELECT_CC: {
8582 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
8583 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
8584 // into separate SETCC+SELECT just like LegalizeDAG.
8585 SDValue Tmp1 = Op.getOperand(0);
8586 SDValue Tmp2 = Op.getOperand(1);
8587 SDValue True = Op.getOperand(2);
8588 SDValue False = Op.getOperand(3);
8589 EVT VT = Op.getValueType();
8590 SDValue CC = Op.getOperand(4);
8591 EVT CmpVT = Tmp1.getValueType();
8592 EVT CCVT =
8593 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
8594 SDLoc DL(Op);
8595 SDValue Cond =
8596 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
8597 return DAG.getSelect(DL, VT, Cond, True, False);
8598 }
8599 case ISD::SETCC: {
8600 MVT OpVT = Op.getOperand(0).getSimpleValueType();
8601 if (OpVT.isScalarInteger()) {
8602 MVT VT = Op.getSimpleValueType();
8603 SDValue LHS = Op.getOperand(0);
8604 SDValue RHS = Op.getOperand(1);
8605 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
8606 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
8607 "Unexpected CondCode");
8608
8609 SDLoc DL(Op);
8610
8611 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
8612 // convert this to the equivalent of (set(u)ge X, C+1) by using
8613 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
8614 // in a register.
8615 if (isa<ConstantSDNode>(RHS)) {
8616 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
8617 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
8618 // If this is an unsigned compare and the constant is -1, incrementing
8619 // the constant would change behavior. The result should be false.
8620 if (CCVal == ISD::SETUGT && Imm == -1)
8621 return DAG.getConstant(0, DL, VT);
8622 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
8623 CCVal = ISD::getSetCCSwappedOperands(CCVal);
8624 SDValue SetCC = DAG.getSetCC(
8625 DL, VT, LHS, DAG.getSignedConstant(Imm + 1, DL, OpVT), CCVal);
8626 return DAG.getLogicalNOT(DL, SetCC, VT);
8627 }
8628 // Lower (setugt X, 2047) as (setne (srl X, 11), 0).
8629 if (CCVal == ISD::SETUGT && Imm == 2047) {
8630 SDValue Shift = DAG.getNode(ISD::SRL, DL, OpVT, LHS,
8631 DAG.getShiftAmountConstant(11, OpVT, DL));
8632 return DAG.getSetCC(DL, VT, Shift, DAG.getConstant(0, DL, OpVT),
8633 ISD::SETNE);
8634 }
8635 }
8636
8637 // Not a constant we could handle, swap the operands and condition code to
8638 // SETLT/SETULT.
8639 CCVal = ISD::getSetCCSwappedOperands(CCVal);
8640 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
8641 }
8642
8643 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
8644 return SplitVectorOp(Op, DAG);
8645
8646 return lowerToScalableOp(Op, DAG);
8647 }
8648 case ISD::ADD:
8649 case ISD::SUB:
8650 case ISD::MUL:
8651 case ISD::MULHS:
8652 case ISD::MULHU:
8653 case ISD::AND:
8654 case ISD::OR:
8655 case ISD::XOR:
8656 case ISD::SDIV:
8657 case ISD::SREM:
8658 case ISD::UDIV:
8659 case ISD::UREM:
8660 case ISD::BSWAP:
8661 case ISD::CTPOP:
8662 case ISD::VSELECT:
8663 return lowerToScalableOp(Op, DAG);
8664 case ISD::SHL:
8665 case ISD::SRL:
8666 case ISD::SRA:
8667 if (Op.getSimpleValueType().isFixedLengthVector()) {
8668 if (Subtarget.enablePExtCodeGen()) {
8669 // We have patterns for scalar/immediate shift amount, so no lowering
8670 // needed.
8671 if (Op.getOperand(1)->getOpcode() == ISD::SPLAT_VECTOR)
8672 return Op;
8673
8674 // There's no vector-vector version of shift instruction in P extension
8675 // so we need to unroll to scalar computation and pack them back.
8676 return DAG.UnrollVectorOp(Op.getNode());
8677 }
8678 return lowerToScalableOp(Op, DAG);
8679 }
8680 // This can be called for an i32 shift amount that needs to be promoted.
8681 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
8682 "Unexpected custom legalisation");
8683 return SDValue();
8684 case ISD::FABS:
8685 case ISD::FNEG:
8686 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
8687 return lowerFABSorFNEG(Op, DAG, Subtarget);
8688 [[fallthrough]];
8689 case ISD::FADD:
8690 case ISD::FSUB:
8691 case ISD::FMUL:
8692 case ISD::FDIV:
8693 case ISD::FSQRT:
8694 case ISD::FMA:
8695 case ISD::FMINNUM:
8696 case ISD::FMAXNUM:
8697 case ISD::FMINIMUMNUM:
8698 case ISD::FMAXIMUMNUM:
8699 if (isPromotedOpNeedingSplit(Op, Subtarget))
8700 return SplitVectorOp(Op, DAG);
8701 [[fallthrough]];
8702 case ISD::AVGFLOORS:
8703 case ISD::AVGFLOORU:
8704 case ISD::AVGCEILS:
8705 case ISD::AVGCEILU:
8706 case ISD::SMIN:
8707 case ISD::SMAX:
8708 case ISD::UMIN:
8709 case ISD::UMAX:
8710 case ISD::UADDSAT:
8711 case ISD::USUBSAT:
8712 case ISD::SADDSAT:
8713 case ISD::SSUBSAT:
8714 return lowerToScalableOp(Op, DAG);
8715 case ISD::ABDS:
8716 case ISD::ABDU: {
8717 SDLoc dl(Op);
8718 EVT VT = Op->getValueType(0);
8719 SDValue LHS = DAG.getFreeze(Op->getOperand(0));
8720 SDValue RHS = DAG.getFreeze(Op->getOperand(1));
8721 bool IsSigned = Op->getOpcode() == ISD::ABDS;
8722
8723 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
8724 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
8725 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
8726 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
8727 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
8728 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
8729 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
8730 }
8731 case ISD::ABS:
8732 case ISD::VP_ABS:
8733 return lowerABS(Op, DAG);
8734 case ISD::CTLZ:
8736 case ISD::CTTZ:
8738 if (Subtarget.hasStdExtZvbb())
8739 return lowerToScalableOp(Op, DAG);
8740 assert(Op.getOpcode() != ISD::CTTZ);
8741 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
8742 case ISD::FCOPYSIGN:
8743 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
8744 return lowerFCOPYSIGN(Op, DAG, Subtarget);
8745 if (isPromotedOpNeedingSplit(Op, Subtarget))
8746 return SplitVectorOp(Op, DAG);
8747 return lowerToScalableOp(Op, DAG);
8748 case ISD::STRICT_FADD:
8749 case ISD::STRICT_FSUB:
8750 case ISD::STRICT_FMUL:
8751 case ISD::STRICT_FDIV:
8752 case ISD::STRICT_FSQRT:
8753 case ISD::STRICT_FMA:
8754 if (isPromotedOpNeedingSplit(Op, Subtarget))
8755 return SplitStrictFPVectorOp(Op, DAG);
8756 return lowerToScalableOp(Op, DAG);
8757 case ISD::STRICT_FSETCC:
8759 return lowerVectorStrictFSetcc(Op, DAG);
8760 case ISD::STRICT_FCEIL:
8761 case ISD::STRICT_FRINT:
8762 case ISD::STRICT_FFLOOR:
8763 case ISD::STRICT_FTRUNC:
8765 case ISD::STRICT_FROUND:
8767 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
8768 case ISD::MGATHER:
8769 case ISD::VP_GATHER:
8770 return lowerMaskedGather(Op, DAG);
8771 case ISD::MSCATTER:
8772 case ISD::VP_SCATTER:
8773 return lowerMaskedScatter(Op, DAG);
8774 case ISD::GET_ROUNDING:
8775 return lowerGET_ROUNDING(Op, DAG);
8776 case ISD::SET_ROUNDING:
8777 return lowerSET_ROUNDING(Op, DAG);
8778 case ISD::GET_FPENV:
8779 return lowerGET_FPENV(Op, DAG);
8780 case ISD::SET_FPENV:
8781 return lowerSET_FPENV(Op, DAG);
8782 case ISD::RESET_FPENV:
8783 return lowerRESET_FPENV(Op, DAG);
8784 case ISD::GET_FPMODE:
8785 return lowerGET_FPMODE(Op, DAG);
8786 case ISD::SET_FPMODE:
8787 return lowerSET_FPMODE(Op, DAG);
8788 case ISD::RESET_FPMODE:
8789 return lowerRESET_FPMODE(Op, DAG);
8790 case ISD::EH_DWARF_CFA:
8791 return lowerEH_DWARF_CFA(Op, DAG);
8792 case ISD::VP_MERGE:
8793 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
8794 return lowerVPMergeMask(Op, DAG);
8795 [[fallthrough]];
8796 case ISD::VP_SELECT:
8797 case ISD::VP_ADD:
8798 case ISD::VP_SUB:
8799 case ISD::VP_MUL:
8800 case ISD::VP_SDIV:
8801 case ISD::VP_UDIV:
8802 case ISD::VP_SREM:
8803 case ISD::VP_UREM:
8804 case ISD::VP_UADDSAT:
8805 case ISD::VP_USUBSAT:
8806 case ISD::VP_SADDSAT:
8807 case ISD::VP_SSUBSAT:
8808 case ISD::VP_LRINT:
8809 case ISD::VP_LLRINT:
8810 return lowerVPOp(Op, DAG);
8811 case ISD::VP_AND:
8812 case ISD::VP_OR:
8813 case ISD::VP_XOR:
8814 return lowerLogicVPOp(Op, DAG);
8815 case ISD::VP_FADD:
8816 case ISD::VP_FSUB:
8817 case ISD::VP_FMUL:
8818 case ISD::VP_FDIV:
8819 case ISD::VP_FNEG:
8820 case ISD::VP_FABS:
8821 case ISD::VP_SQRT:
8822 case ISD::VP_FMA:
8823 case ISD::VP_FMINNUM:
8824 case ISD::VP_FMAXNUM:
8825 case ISD::VP_FCOPYSIGN:
8826 if (isPromotedOpNeedingSplit(Op, Subtarget))
8827 return SplitVPOp(Op, DAG);
8828 [[fallthrough]];
8829 case ISD::VP_SRA:
8830 case ISD::VP_SRL:
8831 case ISD::VP_SHL:
8832 return lowerVPOp(Op, DAG);
8833 case ISD::VP_IS_FPCLASS:
8834 return LowerIS_FPCLASS(Op, DAG);
8835 case ISD::VP_SIGN_EXTEND:
8836 case ISD::VP_ZERO_EXTEND:
8837 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
8838 return lowerVPExtMaskOp(Op, DAG);
8839 return lowerVPOp(Op, DAG);
8840 case ISD::VP_TRUNCATE:
8841 return lowerVectorTruncLike(Op, DAG);
8842 case ISD::VP_FP_EXTEND:
8843 case ISD::VP_FP_ROUND:
8844 return lowerVectorFPExtendOrRoundLike(Op, DAG);
8845 case ISD::VP_SINT_TO_FP:
8846 case ISD::VP_UINT_TO_FP:
8847 if (Op.getValueType().isVector() &&
8848 ((Op.getValueType().getScalarType() == MVT::f16 &&
8849 (Subtarget.hasVInstructionsF16Minimal() &&
8850 !Subtarget.hasVInstructionsF16())) ||
8851 Op.getValueType().getScalarType() == MVT::bf16)) {
8852 if (isPromotedOpNeedingSplit(Op, Subtarget))
8853 return SplitVectorOp(Op, DAG);
8854 // int -> f32
8855 SDLoc DL(Op);
8856 MVT NVT =
8857 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
8858 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
8859 // f32 -> [b]f16
8860 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
8861 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
8862 }
8863 [[fallthrough]];
8864 case ISD::VP_FP_TO_SINT:
8865 case ISD::VP_FP_TO_UINT:
8866 if (SDValue Op1 = Op.getOperand(0);
8867 Op1.getValueType().isVector() &&
8868 ((Op1.getValueType().getScalarType() == MVT::f16 &&
8869 (Subtarget.hasVInstructionsF16Minimal() &&
8870 !Subtarget.hasVInstructionsF16())) ||
8871 Op1.getValueType().getScalarType() == MVT::bf16)) {
8872 if (isPromotedOpNeedingSplit(Op1, Subtarget))
8873 return SplitVectorOp(Op, DAG);
8874 // [b]f16 -> f32
8875 SDLoc DL(Op);
8876 MVT NVT = MVT::getVectorVT(MVT::f32,
8877 Op1.getValueType().getVectorElementCount());
8878 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
8879 // f32 -> int
8880 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
8881 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
8882 }
8883 return lowerVPFPIntConvOp(Op, DAG);
8884 case ISD::VP_SETCC:
8885 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
8886 return SplitVPOp(Op, DAG);
8887 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
8888 return lowerVPSetCCMaskOp(Op, DAG);
8889 [[fallthrough]];
8890 case ISD::VP_SMIN:
8891 case ISD::VP_SMAX:
8892 case ISD::VP_UMIN:
8893 case ISD::VP_UMAX:
8894 case ISD::VP_BITREVERSE:
8895 case ISD::VP_BSWAP:
8896 return lowerVPOp(Op, DAG);
8897 case ISD::VP_CTLZ:
8898 case ISD::VP_CTLZ_ZERO_UNDEF:
8899 if (Subtarget.hasStdExtZvbb())
8900 return lowerVPOp(Op, DAG);
8901 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
8902 case ISD::VP_CTTZ:
8903 case ISD::VP_CTTZ_ZERO_UNDEF:
8904 if (Subtarget.hasStdExtZvbb())
8905 return lowerVPOp(Op, DAG);
8906 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
8907 case ISD::VP_CTPOP:
8908 return lowerVPOp(Op, DAG);
8909 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
8910 return lowerVPStridedLoad(Op, DAG);
8911 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
8912 return lowerVPStridedStore(Op, DAG);
8913 case ISD::VP_FCEIL:
8914 case ISD::VP_FFLOOR:
8915 case ISD::VP_FRINT:
8916 case ISD::VP_FNEARBYINT:
8917 case ISD::VP_FROUND:
8918 case ISD::VP_FROUNDEVEN:
8919 case ISD::VP_FROUNDTOZERO:
8920 if (isPromotedOpNeedingSplit(Op, Subtarget))
8921 return SplitVPOp(Op, DAG);
8922 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
8923 case ISD::VP_FMAXIMUM:
8924 case ISD::VP_FMINIMUM:
8925 if (isPromotedOpNeedingSplit(Op, Subtarget))
8926 return SplitVPOp(Op, DAG);
8927 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
8928 case ISD::EXPERIMENTAL_VP_SPLICE:
8929 return lowerVPSpliceExperimental(Op, DAG);
8930 case ISD::EXPERIMENTAL_VP_REVERSE:
8931 return lowerVPReverseExperimental(Op, DAG);
8932 case ISD::CLEAR_CACHE: {
8933 assert(getTargetMachine().getTargetTriple().isOSLinux() &&
8934 "llvm.clear_cache only needs custom lower on Linux targets");
8935 SDLoc DL(Op);
8936 SDValue Flags = DAG.getConstant(0, DL, Subtarget.getXLenVT());
8937 return emitFlushICache(DAG, Op.getOperand(0), Op.getOperand(1),
8938 Op.getOperand(2), Flags, DL);
8939 }
8941 return lowerDYNAMIC_STACKALLOC(Op, DAG);
8943 return lowerINIT_TRAMPOLINE(Op, DAG);
8945 return lowerADJUST_TRAMPOLINE(Op, DAG);
8949 return lowerPARTIAL_REDUCE_MLA(Op, DAG);
8950 }
8951}
8952
8953SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG, SDValue InChain,
8954 SDValue Start, SDValue End,
8955 SDValue Flags, SDLoc DL) const {
8956 MakeLibCallOptions CallOptions;
8957 std::pair<SDValue, SDValue> CallResult =
8958 makeLibCall(DAG, RTLIB::RISCV_FLUSH_ICACHE, MVT::isVoid,
8959 {Start, End, Flags}, CallOptions, DL, InChain);
8960
8961 // This function returns void so only the out chain matters.
8962 return CallResult.second;
8963}
8964
8965SDValue RISCVTargetLowering::lowerINIT_TRAMPOLINE(SDValue Op,
8966 SelectionDAG &DAG) const {
8967 if (!Subtarget.is64Bit())
8968 llvm::reportFatalUsageError("Trampolines only implemented for RV64");
8969
8970 // Create an MCCodeEmitter to encode instructions.
8971 TargetLoweringObjectFile *TLO = getTargetMachine().getObjFileLowering();
8972 assert(TLO);
8973 MCContext &MCCtx = TLO->getContext();
8974
8975 std::unique_ptr<MCCodeEmitter> CodeEmitter(
8976 createRISCVMCCodeEmitter(*getTargetMachine().getMCInstrInfo(), MCCtx));
8977
8978 SDValue Root = Op.getOperand(0);
8979 SDValue Trmp = Op.getOperand(1); // trampoline
8980 SDLoc dl(Op);
8981
8982 const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
8983
8984 // We store in the trampoline buffer the following instructions and data.
8985 // Offset:
8986 // 0: auipc t2, 0
8987 // 4: ld t0, 24(t2)
8988 // 8: ld t2, 16(t2)
8989 // 12: jalr t0
8990 // 16: <StaticChainOffset>
8991 // 24: <FunctionAddressOffset>
8992 // 32:
8993 // Offset with branch control flow protection enabled:
8994 // 0: lpad <imm20>
8995 // 4: auipc t3, 0
8996 // 8: ld t2, 28(t3)
8997 // 12: ld t3, 20(t3)
8998 // 16: jalr t2
8999 // 20: <StaticChainOffset>
9000 // 28: <FunctionAddressOffset>
9001 // 36:
9002
9003 const bool HasCFBranch =
9004 Subtarget.hasStdExtZicfilp() &&
9006 "cf-protection-branch");
9007 const unsigned StaticChainIdx = HasCFBranch ? 5 : 4;
9008 const unsigned StaticChainOffset = StaticChainIdx * 4;
9009 const unsigned FunctionAddressOffset = StaticChainOffset + 8;
9010
9011 const MCSubtargetInfo *STI = getTargetMachine().getMCSubtargetInfo();
9012 assert(STI);
9013 auto GetEncoding = [&](const MCInst &MC) {
9016 CodeEmitter->encodeInstruction(MC, CB, Fixups, *STI);
9017 uint32_t Encoding = support::endian::read32le(CB.data());
9018 return Encoding;
9019 };
9020
9021 SmallVector<SDValue> OutChains;
9022
9023 SmallVector<uint32_t> Encodings;
9024 if (!HasCFBranch) {
9025 Encodings.append(
9026 {// auipc t2, 0
9027 // Loads the current PC into t2.
9028 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X7).addImm(0)),
9029 // ld t0, 24(t2)
9030 // Loads the function address into t0. Note that we are using offsets
9031 // pc-relative to the first instruction of the trampoline.
9032 GetEncoding(MCInstBuilder(RISCV::LD)
9033 .addReg(RISCV::X5)
9034 .addReg(RISCV::X7)
9035 .addImm(FunctionAddressOffset)),
9036 // ld t2, 16(t2)
9037 // Load the value of the static chain.
9038 GetEncoding(MCInstBuilder(RISCV::LD)
9039 .addReg(RISCV::X7)
9040 .addReg(RISCV::X7)
9041 .addImm(StaticChainOffset)),
9042 // jalr t0
9043 // Jump to the function.
9044 GetEncoding(MCInstBuilder(RISCV::JALR)
9045 .addReg(RISCV::X0)
9046 .addReg(RISCV::X5)
9047 .addImm(0))});
9048 } else {
9049 Encodings.append(
9050 {// auipc x0, <imm20> (lpad <imm20>)
9051 // Landing pad.
9052 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X0).addImm(0)),
9053 // auipc t3, 0
9054 // Loads the current PC into t3.
9055 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X28).addImm(0)),
9056 // ld t2, (FunctionAddressOffset - 4)(t3)
9057 // Loads the function address into t2. Note that we are using offsets
9058 // pc-relative to the SECOND instruction of the trampoline.
9059 GetEncoding(MCInstBuilder(RISCV::LD)
9060 .addReg(RISCV::X7)
9061 .addReg(RISCV::X28)
9062 .addImm(FunctionAddressOffset - 4)),
9063 // ld t3, (StaticChainOffset - 4)(t3)
9064 // Load the value of the static chain.
9065 GetEncoding(MCInstBuilder(RISCV::LD)
9066 .addReg(RISCV::X28)
9067 .addReg(RISCV::X28)
9068 .addImm(StaticChainOffset - 4)),
9069 // jalr t2
9070 // Software-guarded jump to the function.
9071 GetEncoding(MCInstBuilder(RISCV::JALR)
9072 .addReg(RISCV::X0)
9073 .addReg(RISCV::X7)
9074 .addImm(0))});
9075 }
9076
9077 // Store encoded instructions.
9078 for (auto [Idx, Encoding] : llvm::enumerate(Encodings)) {
9079 SDValue Addr = Idx > 0 ? DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
9080 DAG.getConstant(Idx * 4, dl, MVT::i64))
9081 : Trmp;
9082 OutChains.push_back(DAG.getTruncStore(
9083 Root, dl, DAG.getConstant(Encoding, dl, MVT::i64), Addr,
9084 MachinePointerInfo(TrmpAddr, Idx * 4), MVT::i32));
9085 }
9086
9087 // Now store the variable part of the trampoline.
9088 SDValue FunctionAddress = Op.getOperand(2);
9089 SDValue StaticChain = Op.getOperand(3);
9090
9091 // Store the given static chain and function pointer in the trampoline buffer.
9092 struct OffsetValuePair {
9093 const unsigned Offset;
9094 const SDValue Value;
9095 SDValue Addr = SDValue(); // Used to cache the address.
9096 } OffsetValues[] = {
9097 {StaticChainOffset, StaticChain},
9098 {FunctionAddressOffset, FunctionAddress},
9099 };
9100 for (auto &OffsetValue : OffsetValues) {
9101 SDValue Addr =
9102 DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
9103 DAG.getConstant(OffsetValue.Offset, dl, MVT::i64));
9104 OffsetValue.Addr = Addr;
9105 OutChains.push_back(
9106 DAG.getStore(Root, dl, OffsetValue.Value, Addr,
9107 MachinePointerInfo(TrmpAddr, OffsetValue.Offset)));
9108 }
9109
9110 assert(OutChains.size() == StaticChainIdx + 2 &&
9111 "Size of OutChains mismatch");
9112 SDValue StoreToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
9113
9114 // The end of instructions of trampoline is the same as the static chain
9115 // address that we computed earlier.
9116 SDValue EndOfTrmp = OffsetValues[0].Addr;
9117
9118 // Call clear cache on the trampoline instructions.
9119 SDValue Chain = DAG.getNode(ISD::CLEAR_CACHE, dl, MVT::Other, StoreToken,
9120 Trmp, EndOfTrmp);
9121
9122 return Chain;
9123}
9124
9125SDValue RISCVTargetLowering::lowerADJUST_TRAMPOLINE(SDValue Op,
9126 SelectionDAG &DAG) const {
9127 if (!Subtarget.is64Bit())
9128 llvm::reportFatalUsageError("Trampolines only implemented for RV64");
9129
9130 return Op.getOperand(0);
9131}
9132
9133SDValue RISCVTargetLowering::lowerPARTIAL_REDUCE_MLA(SDValue Op,
9134 SelectionDAG &DAG) const {
9135 // Currently, only the vqdot and vqdotu case (from zvqdotq) should be legal.
9136 // TODO: There are many other sub-cases we could potentially lower, are
9137 // any of them worthwhile? Ex: via vredsum, vwredsum, vwwmaccu, etc..
9138 SDLoc DL(Op);
9139 MVT VT = Op.getSimpleValueType();
9140 SDValue Accum = Op.getOperand(0);
9141 assert(Accum.getSimpleValueType() == VT &&
9142 VT.getVectorElementType() == MVT::i32);
9143 SDValue A = Op.getOperand(1);
9144 SDValue B = Op.getOperand(2);
9145 MVT ArgVT = A.getSimpleValueType();
9146 assert(ArgVT == B.getSimpleValueType() &&
9147 ArgVT.getVectorElementType() == MVT::i8);
9148 (void)ArgVT;
9149
9150 // The zvqdotq pseudos are defined with sources and destination both
9151 // being i32. This cast is needed for correctness to avoid incorrect
9152 // .vx matching of i8 splats.
9153 A = DAG.getBitcast(VT, A);
9154 B = DAG.getBitcast(VT, B);
9155
9156 MVT ContainerVT = VT;
9157 if (VT.isFixedLengthVector()) {
9158 ContainerVT = getContainerForFixedLengthVector(VT);
9159 Accum = convertToScalableVector(ContainerVT, Accum, DAG, Subtarget);
9160 A = convertToScalableVector(ContainerVT, A, DAG, Subtarget);
9161 B = convertToScalableVector(ContainerVT, B, DAG, Subtarget);
9162 }
9163
9164 unsigned Opc;
9165 switch (Op.getOpcode()) {
9167 Opc = RISCVISD::VQDOT_VL;
9168 break;
9170 Opc = RISCVISD::VQDOTU_VL;
9171 break;
9173 Opc = RISCVISD::VQDOTSU_VL;
9174 break;
9175 default:
9176 llvm_unreachable("Unexpected opcode");
9177 }
9178 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
9179 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, {A, B, Accum, Mask, VL});
9180 if (VT.isFixedLengthVector())
9181 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
9182 return Res;
9183}
9184
9186 SelectionDAG &DAG, unsigned Flags) {
9187 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
9188}
9189
9191 SelectionDAG &DAG, unsigned Flags) {
9192 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
9193 Flags);
9194}
9195
9197 SelectionDAG &DAG, unsigned Flags) {
9198 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
9199 N->getOffset(), Flags);
9200}
9201
9203 SelectionDAG &DAG, unsigned Flags) {
9204 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
9205}
9206
9208 EVT Ty, SelectionDAG &DAG) {
9210 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
9211 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
9212 return DAG.getLoad(
9213 Ty, DL, DAG.getEntryNode(), LC,
9215}
9216
9218 EVT Ty, SelectionDAG &DAG) {
9220 RISCVConstantPoolValue::Create(*DAG.getContext(), N->getSymbol());
9221 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
9222 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
9223 return DAG.getLoad(
9224 Ty, DL, DAG.getEntryNode(), LC,
9226}
9227
9228template <class NodeTy>
9229SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
9230 bool IsLocal, bool IsExternWeak) const {
9231 SDLoc DL(N);
9232 EVT Ty = getPointerTy(DAG.getDataLayout());
9233
9234 // When HWASAN is used and tagging of global variables is enabled
9235 // they should be accessed via the GOT, since the tagged address of a global
9236 // is incompatible with existing code models. This also applies to non-pic
9237 // mode.
9238 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
9239 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
9240 if (IsLocal && !Subtarget.allowTaggedGlobals())
9241 // Use PC-relative addressing to access the symbol. This generates the
9242 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
9243 // %pcrel_lo(auipc)).
9244 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
9245
9246 // Use PC-relative addressing to access the GOT for this symbol, then load
9247 // the address from the GOT. This generates the pattern (PseudoLGA sym),
9248 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
9249 SDValue Load =
9250 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
9251 MachineFunction &MF = DAG.getMachineFunction();
9252 MachineMemOperand *MemOp = MF.getMachineMemOperand(
9256 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
9257 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
9258 return Load;
9259 }
9260
9261 switch (getTargetMachine().getCodeModel()) {
9262 default:
9263 reportFatalUsageError("Unsupported code model for lowering");
9264 case CodeModel::Small: {
9265 // Generate a sequence for accessing addresses within the first 2 GiB of
9266 // address space.
9267 if (Subtarget.hasVendorXqcili()) {
9268 // Use QC.E.LI to generate the address, as this is easier to relax than
9269 // LUI/ADDI.
9270 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
9271 return DAG.getNode(RISCVISD::QC_E_LI, DL, Ty, Addr);
9272 }
9273
9274 // This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
9275 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
9276 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
9277 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
9278 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
9279 }
9280 case CodeModel::Medium: {
9281 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
9282 if (IsExternWeak) {
9283 // An extern weak symbol may be undefined, i.e. have value 0, which may
9284 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
9285 // symbol. This generates the pattern (PseudoLGA sym), which expands to
9286 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
9287 SDValue Load =
9288 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
9289 MachineFunction &MF = DAG.getMachineFunction();
9290 MachineMemOperand *MemOp = MF.getMachineMemOperand(
9294 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
9295 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
9296 return Load;
9297 }
9298
9299 // Generate a sequence for accessing addresses within any 2GiB range within
9300 // the address space. This generates the pattern (PseudoLLA sym), which
9301 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
9302 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
9303 }
9304 case CodeModel::Large: {
9305 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N))
9306 return getLargeGlobalAddress(G, DL, Ty, DAG);
9307
9308 // Using pc-relative mode for other node type.
9309 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
9310 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
9311 }
9312 }
9313}
9314
9315SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
9316 SelectionDAG &DAG) const {
9317 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
9318 assert(N->getOffset() == 0 && "unexpected offset in global node");
9319 const GlobalValue *GV = N->getGlobal();
9320 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
9321}
9322
9323SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
9324 SelectionDAG &DAG) const {
9325 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
9326
9327 return getAddr(N, DAG);
9328}
9329
9330SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
9331 SelectionDAG &DAG) const {
9332 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
9333
9334 return getAddr(N, DAG);
9335}
9336
9337SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
9338 SelectionDAG &DAG) const {
9339 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
9340
9341 return getAddr(N, DAG);
9342}
9343
9344SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
9345 SelectionDAG &DAG,
9346 bool UseGOT) const {
9347 SDLoc DL(N);
9348 EVT Ty = getPointerTy(DAG.getDataLayout());
9349 const GlobalValue *GV = N->getGlobal();
9350 MVT XLenVT = Subtarget.getXLenVT();
9351
9352 if (UseGOT) {
9353 // Use PC-relative addressing to access the GOT for this TLS symbol, then
9354 // load the address from the GOT and add the thread pointer. This generates
9355 // the pattern (PseudoLA_TLS_IE sym), which expands to
9356 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
9357 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
9358 SDValue Load =
9359 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
9360 MachineFunction &MF = DAG.getMachineFunction();
9361 MachineMemOperand *MemOp = MF.getMachineMemOperand(
9365 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
9366 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
9367
9368 // Add the thread pointer.
9369 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
9370 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
9371 }
9372
9373 // Generate a sequence for accessing the address relative to the thread
9374 // pointer, with the appropriate adjustment for the thread pointer offset.
9375 // This generates the pattern
9376 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
9377 SDValue AddrHi =
9379 SDValue AddrAdd =
9381 SDValue AddrLo =
9383
9384 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
9385 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
9386 SDValue MNAdd =
9387 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
9388 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
9389}
9390
9391SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
9392 SelectionDAG &DAG) const {
9393 SDLoc DL(N);
9394 EVT Ty = getPointerTy(DAG.getDataLayout());
9395 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
9396 const GlobalValue *GV = N->getGlobal();
9397
9398 // Use a PC-relative addressing mode to access the global dynamic GOT address.
9399 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
9400 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
9401 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
9402 SDValue Load =
9403 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
9404
9405 // Prepare argument list to generate call.
9407 Args.emplace_back(Load, CallTy);
9408
9409 // Setup call to __tls_get_addr.
9410 TargetLowering::CallLoweringInfo CLI(DAG);
9411 CLI.setDebugLoc(DL)
9412 .setChain(DAG.getEntryNode())
9413 .setLibCallee(CallingConv::C, CallTy,
9414 DAG.getExternalSymbol("__tls_get_addr", Ty),
9415 std::move(Args));
9416
9417 return LowerCallTo(CLI).first;
9418}
9419
9420SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
9421 SelectionDAG &DAG) const {
9422 SDLoc DL(N);
9423 EVT Ty = getPointerTy(DAG.getDataLayout());
9424 const GlobalValue *GV = N->getGlobal();
9425
9426 // Use a PC-relative addressing mode to access the global dynamic GOT address.
9427 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
9428 //
9429 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
9430 // lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
9431 // addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)
9432 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
9433 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
9434 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);
9435}
9436
9437SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
9438 SelectionDAG &DAG) const {
9439 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
9440 assert(N->getOffset() == 0 && "unexpected offset in global node");
9441
9442 if (DAG.getTarget().useEmulatedTLS())
9443 return LowerToTLSEmulatedModel(N, DAG);
9444
9446
9449 reportFatalUsageError("In GHC calling convention TLS is not supported");
9450
9451 SDValue Addr;
9452 switch (Model) {
9454 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
9455 break;
9457 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
9458 break;
9461 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
9462 : getDynamicTLSAddr(N, DAG);
9463 break;
9464 }
9465
9466 return Addr;
9467}
9468
9469// Return true if Val is equal to (setcc LHS, RHS, CC).
9470// Return false if Val is the inverse of (setcc LHS, RHS, CC).
9471// Otherwise, return std::nullopt.
9472static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
9473 ISD::CondCode CC, SDValue Val) {
9474 assert(Val->getOpcode() == ISD::SETCC);
9475 SDValue LHS2 = Val.getOperand(0);
9476 SDValue RHS2 = Val.getOperand(1);
9477 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
9478
9479 if (LHS == LHS2 && RHS == RHS2) {
9480 if (CC == CC2)
9481 return true;
9482 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
9483 return false;
9484 } else if (LHS == RHS2 && RHS == LHS2) {
9486 if (CC == CC2)
9487 return true;
9488 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
9489 return false;
9490 }
9491
9492 return std::nullopt;
9493}
9494
9496 return isa<ConstantSDNode>(V) && V->getAsAPIntVal().isSignedIntN(12);
9497}
9498
9500 const RISCVSubtarget &Subtarget) {
9501 SDValue CondV = N->getOperand(0);
9502 SDValue TrueV = N->getOperand(1);
9503 SDValue FalseV = N->getOperand(2);
9504 MVT VT = N->getSimpleValueType(0);
9505 SDLoc DL(N);
9506
9507 if (!Subtarget.hasConditionalMoveFusion()) {
9508 // (select c, -1, y) -> -c | y
9509 if (isAllOnesConstant(TrueV)) {
9510 SDValue Neg = DAG.getNegative(CondV, DL, VT);
9511 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
9512 }
9513 // (select c, y, -1) -> (c-1) | y
9514 if (isAllOnesConstant(FalseV)) {
9515 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
9516 DAG.getAllOnesConstant(DL, VT));
9517 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
9518 }
9519
9520 const bool HasCZero = VT.isScalarInteger() && Subtarget.hasCZEROLike();
9521
9522 // (select c, 0, y) -> (c-1) & y
9523 if (isNullConstant(TrueV) && (!HasCZero || isSimm12Constant(FalseV))) {
9524 SDValue Neg =
9525 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
9526 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
9527 }
9528 if (isNullConstant(FalseV)) {
9529 // (select c, (1 << ShAmount) + 1, 0) -> (c << ShAmount) + c
9530 if (auto *TrueC = dyn_cast<ConstantSDNode>(TrueV)) {
9531 uint64_t TrueM1 = TrueC->getZExtValue() - 1;
9532 if (isPowerOf2_64(TrueM1)) {
9533 unsigned ShAmount = Log2_64(TrueM1);
9534 if (Subtarget.hasShlAdd(ShAmount))
9535 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, CondV,
9536 DAG.getTargetConstant(ShAmount, DL, VT), CondV);
9537 }
9538 }
9539 // (select c, y, 0) -> -c & y
9540 if (!HasCZero || isSimm12Constant(TrueV)) {
9541 SDValue Neg = DAG.getNegative(CondV, DL, VT);
9542 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
9543 }
9544 }
9545 }
9546
9547 // select c, ~x, x --> xor -c, x
9548 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
9549 const APInt &TrueVal = TrueV->getAsAPIntVal();
9550 const APInt &FalseVal = FalseV->getAsAPIntVal();
9551 if (~TrueVal == FalseVal) {
9552 SDValue Neg = DAG.getNegative(CondV, DL, VT);
9553 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
9554 }
9555 }
9556
9557 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
9558 // when both truev and falsev are also setcc.
9559 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
9560 FalseV.getOpcode() == ISD::SETCC) {
9561 SDValue LHS = CondV.getOperand(0);
9562 SDValue RHS = CondV.getOperand(1);
9563 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9564
9565 // (select x, x, y) -> x | y
9566 // (select !x, x, y) -> x & y
9567 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
9568 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
9569 DAG.getFreeze(FalseV));
9570 }
9571 // (select x, y, x) -> x & y
9572 // (select !x, y, x) -> x | y
9573 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
9574 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
9575 DAG.getFreeze(TrueV), FalseV);
9576 }
9577 }
9578
9579 return SDValue();
9580}
9581
9582// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
9583// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
9584// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
9585// being `0` or `-1`. In such cases we can replace `select` with `and`.
9586// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
9587// than `c0`?
9588static SDValue
9590 const RISCVSubtarget &Subtarget) {
9591 if (Subtarget.hasShortForwardBranchIALU())
9592 return SDValue();
9593
9594 unsigned SelOpNo = 0;
9595 SDValue Sel = BO->getOperand(0);
9596 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
9597 SelOpNo = 1;
9598 Sel = BO->getOperand(1);
9599 }
9600
9601 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
9602 return SDValue();
9603
9604 unsigned ConstSelOpNo = 1;
9605 unsigned OtherSelOpNo = 2;
9606 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
9607 ConstSelOpNo = 2;
9608 OtherSelOpNo = 1;
9609 }
9610 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
9611 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
9612 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
9613 return SDValue();
9614
9615 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
9616 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
9617 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
9618 return SDValue();
9619
9620 SDLoc DL(Sel);
9621 EVT VT = BO->getValueType(0);
9622
9623 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
9624 if (SelOpNo == 1)
9625 std::swap(NewConstOps[0], NewConstOps[1]);
9626
9627 SDValue NewConstOp =
9628 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
9629 if (!NewConstOp)
9630 return SDValue();
9631
9632 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
9633 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
9634 return SDValue();
9635
9636 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
9637 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
9638 if (SelOpNo == 1)
9639 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
9640 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
9641
9642 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
9643 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
9644 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
9645}
9646
9647SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
9648 SDValue CondV = Op.getOperand(0);
9649 SDValue TrueV = Op.getOperand(1);
9650 SDValue FalseV = Op.getOperand(2);
9651 SDLoc DL(Op);
9652 MVT VT = Op.getSimpleValueType();
9653 MVT XLenVT = Subtarget.getXLenVT();
9654
9655 // Lower vector SELECTs to VSELECTs by splatting the condition.
9656 if (VT.isVector()) {
9657 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
9658 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
9659 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
9660 }
9661
9662 // Try some other optimizations before falling back to generic lowering.
9663 if (SDValue V = lowerSelectToBinOp(Op.getNode(), DAG, Subtarget))
9664 return V;
9665
9666 // When there is no cost for GPR <-> FPR, we can use zicond select for
9667 // floating value when CondV is int type
9668 bool FPinGPR = Subtarget.hasStdExtZfinx();
9669
9670 // We can handle FGPR without spliting into hi/lo parts
9671 bool FitsInGPR = TypeSize::isKnownLE(VT.getSizeInBits(),
9672 Subtarget.getXLenVT().getSizeInBits());
9673
9674 bool UseZicondForFPSel = Subtarget.hasStdExtZicond() && FPinGPR &&
9675 VT.isFloatingPoint() && FitsInGPR;
9676
9677 if (UseZicondForFPSel) {
9678
9679 auto CastToInt = [&](SDValue V) -> SDValue {
9680 // Treat +0.0 as int 0 to enable single 'czero' instruction generation.
9681 if (isNullFPConstant(V))
9682 return DAG.getConstant(0, DL, XLenVT);
9683
9684 if (VT == MVT::f16)
9685 return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, V);
9686
9687 if (VT == MVT::f32 && Subtarget.is64Bit())
9688 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, XLenVT, V);
9689
9690 return DAG.getBitcast(XLenVT, V);
9691 };
9692
9693 SDValue TrueVInt = CastToInt(TrueV);
9694 SDValue FalseVInt = CastToInt(FalseV);
9695
9696 // Emit integer SELECT (lowers to Zicond)
9697 SDValue ResultInt =
9698 DAG.getNode(ISD::SELECT, DL, XLenVT, CondV, TrueVInt, FalseVInt);
9699
9700 // Convert back to floating VT
9701 if (VT == MVT::f32 && Subtarget.is64Bit())
9702 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, VT, ResultInt);
9703
9704 if (VT == MVT::f16)
9705 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, ResultInt);
9706
9707 return DAG.getBitcast(VT, ResultInt);
9708 }
9709
9710 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
9711 // nodes to implement the SELECT. Performing the lowering here allows for
9712 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
9713 // sequence or RISCVISD::SELECT_CC node (branch-based select).
9714 if (Subtarget.hasCZEROLike() && VT.isScalarInteger()) {
9715
9716 // (select c, t, 0) -> (czero_eqz t, c)
9717 if (isNullConstant(FalseV))
9718 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
9719 // (select c, 0, f) -> (czero_nez f, c)
9720 if (isNullConstant(TrueV))
9721 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
9722
9723 // Check to see if a given operation is a 'NOT', if so return the negated
9724 // operand
9725 auto getNotOperand = [](const SDValue &Op) -> std::optional<const SDValue> {
9726 using namespace llvm::SDPatternMatch;
9727 SDValue Xor;
9728 if (sd_match(Op, m_OneUse(m_Not(m_Value(Xor))))) {
9729 return Xor;
9730 }
9731 return std::nullopt;
9732 };
9733 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
9734 // (select c, (and f, ~x), f) -> (andn f, (czero_eqz x, c))
9735 if (TrueV.getOpcode() == ISD::AND &&
9736 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV)) {
9737 auto NotOperand = (TrueV.getOperand(0) == FalseV)
9738 ? getNotOperand(TrueV.getOperand(1))
9739 : getNotOperand(TrueV.getOperand(0));
9740 if (NotOperand) {
9741 SDValue CMOV =
9742 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, *NotOperand, CondV);
9743 SDValue NOT = DAG.getNOT(DL, CMOV, VT);
9744 return DAG.getNode(ISD::AND, DL, VT, FalseV, NOT);
9745 }
9746 return DAG.getNode(
9747 ISD::OR, DL, VT, TrueV,
9748 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
9749 }
9750
9751 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
9752 // (select c, t, (and t, ~x)) -> (andn t, (czero_nez x, c))
9753 if (FalseV.getOpcode() == ISD::AND &&
9754 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV)) {
9755 auto NotOperand = (FalseV.getOperand(0) == TrueV)
9756 ? getNotOperand(FalseV.getOperand(1))
9757 : getNotOperand(FalseV.getOperand(0));
9758 if (NotOperand) {
9759 SDValue CMOV =
9760 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, *NotOperand, CondV);
9761 SDValue NOT = DAG.getNOT(DL, CMOV, VT);
9762 return DAG.getNode(ISD::AND, DL, VT, TrueV, NOT);
9763 }
9764 return DAG.getNode(
9765 ISD::OR, DL, VT, FalseV,
9766 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
9767 }
9768
9769 // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
9770 // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
9771 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
9772 const APInt &TrueVal = TrueV->getAsAPIntVal();
9773 const APInt &FalseVal = FalseV->getAsAPIntVal();
9774
9775 // Prefer these over Zicond to avoid materializing an immediate:
9776 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
9777 // (select (x > -1), z, y) -> x >> (XLEN - 1) & (y - z) + z
9778 if (CondV.getOpcode() == ISD::SETCC &&
9779 CondV.getOperand(0).getValueType() == VT && CondV.hasOneUse()) {
9780 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9781 if ((CCVal == ISD::SETLT && isNullConstant(CondV.getOperand(1))) ||
9782 (CCVal == ISD::SETGT && isAllOnesConstant(CondV.getOperand(1)))) {
9783 int64_t TrueImm = TrueVal.getSExtValue();
9784 int64_t FalseImm = FalseVal.getSExtValue();
9785 if (CCVal == ISD::SETGT)
9786 std::swap(TrueImm, FalseImm);
9787 if (isInt<12>(TrueImm) && isInt<12>(FalseImm) &&
9788 isInt<12>(TrueImm - FalseImm)) {
9789 SDValue SRA =
9790 DAG.getNode(ISD::SRA, DL, VT, CondV.getOperand(0),
9791 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
9792 SDValue AND =
9793 DAG.getNode(ISD::AND, DL, VT, SRA,
9794 DAG.getSignedConstant(TrueImm - FalseImm, DL, VT));
9795 return DAG.getNode(ISD::ADD, DL, VT, AND,
9796 DAG.getSignedConstant(FalseImm, DL, VT));
9797 }
9798 }
9799 }
9800
9801 // Use SHL/ADDI (and possible XORI) to avoid having to materialize
9802 // a constant in register
9803 if ((TrueVal - FalseVal).isPowerOf2() && FalseVal.isSignedIntN(12)) {
9804 SDValue Log2 = DAG.getConstant((TrueVal - FalseVal).logBase2(), DL, VT);
9805 SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2);
9806 return DAG.getNode(ISD::ADD, DL, VT, FalseV, BitDiff);
9807 }
9808 if ((FalseVal - TrueVal).isPowerOf2() && TrueVal.isSignedIntN(12)) {
9809 SDValue Log2 = DAG.getConstant((FalseVal - TrueVal).logBase2(), DL, VT);
9810 CondV = DAG.getLogicalNOT(DL, CondV, CondV->getValueType(0));
9811 SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2);
9812 return DAG.getNode(ISD::ADD, DL, VT, TrueV, BitDiff);
9813 }
9814
9815 auto getCost = [&](const APInt &Delta, const APInt &Addend) {
9816 const int DeltaCost = RISCVMatInt::getIntMatCost(
9817 Delta, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
9818 // Does the addend fold into an ADDI
9819 if (Addend.isSignedIntN(12))
9820 return DeltaCost;
9821 const int AddendCost = RISCVMatInt::getIntMatCost(
9822 Addend, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
9823 return AddendCost + DeltaCost;
9824 };
9825 bool IsCZERO_NEZ = getCost(FalseVal - TrueVal, TrueVal) <=
9826 getCost(TrueVal - FalseVal, FalseVal);
9827 SDValue LHSVal = DAG.getConstant(
9828 IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
9829 SDValue CMOV =
9830 DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
9831 DL, VT, LHSVal, CondV);
9832 return DAG.getNode(ISD::ADD, DL, VT, CMOV, IsCZERO_NEZ ? TrueV : FalseV);
9833 }
9834
9835 // (select c, c1, t) -> (add (czero_nez t - c1, c), c1)
9836 // (select c, t, c1) -> (add (czero_eqz t - c1, c), c1)
9837 if (isa<ConstantSDNode>(TrueV) != isa<ConstantSDNode>(FalseV)) {
9838 bool IsCZERO_NEZ = isa<ConstantSDNode>(TrueV);
9839 SDValue ConstVal = IsCZERO_NEZ ? TrueV : FalseV;
9840 SDValue RegV = IsCZERO_NEZ ? FalseV : TrueV;
9841 int64_t RawConstVal = cast<ConstantSDNode>(ConstVal)->getSExtValue();
9842 // Efficient only if the constant and its negation fit into `ADDI`
9843 // Prefer Add/Sub over Xor since can be compressed for small immediates
9844 if (isInt<12>(RawConstVal)) {
9845 // Fall back to XORI if Const == -0x800 since we don't have SUBI.
9846 unsigned SubOpc = (RawConstVal == -0x800) ? ISD::XOR : ISD::SUB;
9847 unsigned AddOpc = (RawConstVal == -0x800) ? ISD::XOR : ISD::ADD;
9848 SDValue SubOp = DAG.getNode(SubOpc, DL, VT, RegV, ConstVal);
9849 SDValue CZERO =
9850 DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
9851 DL, VT, SubOp, CondV);
9852 return DAG.getNode(AddOpc, DL, VT, CZERO, ConstVal);
9853 }
9854 }
9855
9856 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
9857 // Unless we have the short forward branch optimization.
9858 if (!Subtarget.hasConditionalMoveFusion())
9859 return DAG.getNode(
9860 ISD::OR, DL, VT,
9861 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
9862 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV),
9864 }
9865
9866 if (Op.hasOneUse()) {
9867 unsigned UseOpc = Op->user_begin()->getOpcode();
9868 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
9869 SDNode *BinOp = *Op->user_begin();
9870 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
9871 DAG, Subtarget)) {
9872 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
9873 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
9874 // may return a constant node and cause crash in lowerSELECT.
9875 if (NewSel.getOpcode() == ISD::SELECT)
9876 return lowerSELECT(NewSel, DAG);
9877 return NewSel;
9878 }
9879 }
9880 }
9881
9882 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
9883 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
9884 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
9885 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
9886 if (FPTV && FPFV) {
9887 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
9888 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
9889 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
9890 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
9891 DAG.getConstant(1, DL, XLenVT));
9892 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
9893 }
9894 }
9895
9896 // If the condition is not an integer SETCC which operates on XLenVT, we need
9897 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
9898 // (select condv, truev, falsev)
9899 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
9900 if (CondV.getOpcode() != ISD::SETCC ||
9901 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
9902 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
9903 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
9904
9905 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
9906
9907 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
9908 }
9909
9910 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
9911 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
9912 // advantage of the integer compare+branch instructions. i.e.:
9913 // (select (setcc lhs, rhs, cc), truev, falsev)
9914 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
9915 SDValue LHS = CondV.getOperand(0);
9916 SDValue RHS = CondV.getOperand(1);
9917 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9918
9919 // Special case for a select of 2 constants that have a difference of 1.
9920 // Normally this is done by DAGCombine, but if the select is introduced by
9921 // type legalization or op legalization, we miss it. Restricting to SETLT
9922 // case for now because that is what signed saturating add/sub need.
9923 // FIXME: We don't need the condition to be SETLT or even a SETCC,
9924 // but we would probably want to swap the true/false values if the condition
9925 // is SETGE/SETLE to avoid an XORI.
9926 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
9927 CCVal == ISD::SETLT) {
9928 const APInt &TrueVal = TrueV->getAsAPIntVal();
9929 const APInt &FalseVal = FalseV->getAsAPIntVal();
9930 if (TrueVal - 1 == FalseVal)
9931 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
9932 if (TrueVal + 1 == FalseVal)
9933 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
9934 }
9935
9936 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG, Subtarget);
9937 // 1 < x ? x : 1 -> 0 < x ? x : 1
9938 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
9939 RHS == TrueV && LHS == FalseV) {
9940 LHS = DAG.getConstant(0, DL, VT);
9941 // 0 <u x is the same as x != 0.
9942 if (CCVal == ISD::SETULT) {
9943 std::swap(LHS, RHS);
9944 CCVal = ISD::SETNE;
9945 }
9946 }
9947
9948 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
9949 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
9950 RHS == FalseV) {
9951 RHS = DAG.getConstant(0, DL, VT);
9952 }
9953
9954 SDValue TargetCC = DAG.getCondCode(CCVal);
9955
9956 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
9957 // (select (setcc lhs, rhs, CC), constant, falsev)
9958 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
9959 std::swap(TrueV, FalseV);
9960 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
9961 }
9962
9963 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
9964 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
9965}
9966
9967SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
9968 SDValue CondV = Op.getOperand(1);
9969 SDLoc DL(Op);
9970 MVT XLenVT = Subtarget.getXLenVT();
9971
9972 if (CondV.getOpcode() == ISD::SETCC &&
9973 CondV.getOperand(0).getValueType() == XLenVT) {
9974 SDValue LHS = CondV.getOperand(0);
9975 SDValue RHS = CondV.getOperand(1);
9976 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
9977
9978 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG, Subtarget);
9979
9980 SDValue TargetCC = DAG.getCondCode(CCVal);
9981 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
9982 LHS, RHS, TargetCC, Op.getOperand(2));
9983 }
9984
9985 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
9986 CondV, DAG.getConstant(0, DL, XLenVT),
9987 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
9988}
9989
9990SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
9991 MachineFunction &MF = DAG.getMachineFunction();
9992 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
9993
9994 SDLoc DL(Op);
9995 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
9997
9998 // vastart just stores the address of the VarArgsFrameIndex slot into the
9999 // memory location argument.
10000 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
10001 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
10002 MachinePointerInfo(SV));
10003}
10004
10005SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
10006 SelectionDAG &DAG) const {
10007 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
10008 MachineFunction &MF = DAG.getMachineFunction();
10009 MachineFrameInfo &MFI = MF.getFrameInfo();
10010 MFI.setFrameAddressIsTaken(true);
10011 Register FrameReg = RI.getFrameRegister(MF);
10012 int XLenInBytes = Subtarget.getXLen() / 8;
10013
10014 EVT VT = Op.getValueType();
10015 SDLoc DL(Op);
10016 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
10017 unsigned Depth = Op.getConstantOperandVal(0);
10018 while (Depth--) {
10019 int Offset = -(XLenInBytes * 2);
10020 SDValue Ptr = DAG.getNode(
10021 ISD::ADD, DL, VT, FrameAddr,
10023 FrameAddr =
10024 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
10025 }
10026 return FrameAddr;
10027}
10028
10029SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
10030 SelectionDAG &DAG) const {
10031 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
10032 MachineFunction &MF = DAG.getMachineFunction();
10033 MachineFrameInfo &MFI = MF.getFrameInfo();
10034 MFI.setReturnAddressIsTaken(true);
10035 MVT XLenVT = Subtarget.getXLenVT();
10036 int XLenInBytes = Subtarget.getXLen() / 8;
10037
10038 EVT VT = Op.getValueType();
10039 SDLoc DL(Op);
10040 unsigned Depth = Op.getConstantOperandVal(0);
10041 if (Depth) {
10042 int Off = -XLenInBytes;
10043 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
10044 SDValue Offset = DAG.getSignedConstant(Off, DL, VT);
10045 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
10046 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
10047 MachinePointerInfo());
10048 }
10049
10050 // Return the value of the return address register, marking it an implicit
10051 // live-in.
10052 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
10053 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
10054}
10055
10056SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
10057 SelectionDAG &DAG) const {
10058 SDLoc DL(Op);
10059 SDValue Lo = Op.getOperand(0);
10060 SDValue Hi = Op.getOperand(1);
10061 SDValue Shamt = Op.getOperand(2);
10062 EVT VT = Lo.getValueType();
10063
10064 // if Shamt-XLEN < 0: // Shamt < XLEN
10065 // Lo = Lo << Shamt
10066 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
10067 // else:
10068 // Lo = 0
10069 // Hi = Lo << (Shamt-XLEN)
10070
10071 SDValue Zero = DAG.getConstant(0, DL, VT);
10072 SDValue One = DAG.getConstant(1, DL, VT);
10073 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
10074 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
10075 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
10076 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
10077
10078 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
10079 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
10080 SDValue ShiftRightLo =
10081 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
10082 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
10083 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
10084 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
10085
10086 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
10087
10088 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
10089 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
10090
10091 SDValue Parts[2] = {Lo, Hi};
10092 return DAG.getMergeValues(Parts, DL);
10093}
10094
10095SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
10096 bool IsSRA) const {
10097 SDLoc DL(Op);
10098 SDValue Lo = Op.getOperand(0);
10099 SDValue Hi = Op.getOperand(1);
10100 SDValue Shamt = Op.getOperand(2);
10101 EVT VT = Lo.getValueType();
10102
10103 // SRA expansion:
10104 // if Shamt-XLEN < 0: // Shamt < XLEN
10105 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
10106 // Hi = Hi >>s Shamt
10107 // else:
10108 // Lo = Hi >>s (Shamt-XLEN);
10109 // Hi = Hi >>s (XLEN-1)
10110 //
10111 // SRL expansion:
10112 // if Shamt-XLEN < 0: // Shamt < XLEN
10113 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
10114 // Hi = Hi >>u Shamt
10115 // else:
10116 // Lo = Hi >>u (Shamt-XLEN);
10117 // Hi = 0;
10118
10119 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
10120
10121 SDValue Zero = DAG.getConstant(0, DL, VT);
10122 SDValue One = DAG.getConstant(1, DL, VT);
10123 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
10124 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
10125 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
10126 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
10127
10128 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
10129 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
10130 SDValue ShiftLeftHi =
10131 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
10132 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
10133 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
10134 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
10135 SDValue HiFalse =
10136 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
10137
10138 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
10139
10140 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
10141 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
10142
10143 SDValue Parts[2] = {Lo, Hi};
10144 return DAG.getMergeValues(Parts, DL);
10145}
10146
10147// Lower splats of i1 types to SETCC. For each mask vector type, we have a
10148// legal equivalently-sized i8 type, so we can use that as a go-between.
10149SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
10150 SelectionDAG &DAG) const {
10151 SDLoc DL(Op);
10152 MVT VT = Op.getSimpleValueType();
10153 SDValue SplatVal = Op.getOperand(0);
10154 // All-zeros or all-ones splats are handled specially.
10155 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
10156 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
10157 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
10158 }
10159 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
10160 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
10161 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
10162 }
10163 MVT InterVT = VT.changeVectorElementType(MVT::i8);
10164 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
10165 DAG.getConstant(1, DL, SplatVal.getValueType()));
10166 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
10167 SDValue Zero = DAG.getConstant(0, DL, InterVT);
10168 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
10169}
10170
10171// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
10172// illegal (currently only vXi64 RV32).
10173// FIXME: We could also catch non-constant sign-extended i32 values and lower
10174// them to VMV_V_X_VL.
10175SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
10176 SelectionDAG &DAG) const {
10177 SDLoc DL(Op);
10178 MVT VecVT = Op.getSimpleValueType();
10179 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
10180 "Unexpected SPLAT_VECTOR_PARTS lowering");
10181
10182 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
10183 SDValue Lo = Op.getOperand(0);
10184 SDValue Hi = Op.getOperand(1);
10185
10186 MVT ContainerVT = VecVT;
10187 if (VecVT.isFixedLengthVector())
10188 ContainerVT = getContainerForFixedLengthVector(VecVT);
10189
10190 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
10191
10192 SDValue Res =
10193 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
10194
10195 if (VecVT.isFixedLengthVector())
10196 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
10197
10198 return Res;
10199}
10200
10201// Custom-lower extensions from mask vectors by using a vselect either with 1
10202// for zero/any-extension or -1 for sign-extension:
10203// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
10204// Note that any-extension is lowered identically to zero-extension.
10205SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
10206 int64_t ExtTrueVal) const {
10207 SDLoc DL(Op);
10208 MVT VecVT = Op.getSimpleValueType();
10209 SDValue Src = Op.getOperand(0);
10210 // Only custom-lower extensions from mask types
10211 assert(Src.getValueType().isVector() &&
10212 Src.getValueType().getVectorElementType() == MVT::i1);
10213
10214 if (VecVT.isScalableVector()) {
10215 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
10216 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, VecVT);
10217 if (Src.getOpcode() == ISD::XOR &&
10218 ISD::isConstantSplatVectorAllOnes(Src.getOperand(1).getNode()))
10219 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src.getOperand(0), SplatZero,
10220 SplatTrueVal);
10221 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
10222 }
10223
10224 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
10225 MVT I1ContainerVT =
10226 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
10227
10228 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
10229
10230 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
10231
10232 MVT XLenVT = Subtarget.getXLenVT();
10233 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
10234 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, XLenVT);
10235
10236 if (Src.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
10237 SDValue Xor = Src.getOperand(0);
10238 if (Xor.getOpcode() == RISCVISD::VMXOR_VL) {
10239 SDValue ScalableOnes = Xor.getOperand(1);
10240 if (ScalableOnes.getOpcode() == ISD::INSERT_SUBVECTOR &&
10241 ScalableOnes.getOperand(0).isUndef() &&
10243 ScalableOnes.getOperand(1).getNode())) {
10244 CC = Xor.getOperand(0);
10245 std::swap(SplatZero, SplatTrueVal);
10246 }
10247 }
10248 }
10249
10250 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10251 DAG.getUNDEF(ContainerVT), SplatZero, VL);
10252 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10253 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
10254 SDValue Select =
10255 DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal,
10256 SplatZero, DAG.getUNDEF(ContainerVT), VL);
10257
10258 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
10259}
10260
10261// Custom-lower truncations from vectors to mask vectors by using a mask and a
10262// setcc operation:
10263// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
10264SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
10265 SelectionDAG &DAG) const {
10266 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
10267 SDLoc DL(Op);
10268 EVT MaskVT = Op.getValueType();
10269 // Only expect to custom-lower truncations to mask types
10270 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
10271 "Unexpected type for vector mask lowering");
10272 SDValue Src = Op.getOperand(0);
10273 MVT VecVT = Src.getSimpleValueType();
10274 SDValue Mask, VL;
10275 if (IsVPTrunc) {
10276 Mask = Op.getOperand(1);
10277 VL = Op.getOperand(2);
10278 }
10279 // If this is a fixed vector, we need to convert it to a scalable vector.
10280 MVT ContainerVT = VecVT;
10281
10282 if (VecVT.isFixedLengthVector()) {
10283 ContainerVT = getContainerForFixedLengthVector(VecVT);
10284 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
10285 if (IsVPTrunc) {
10286 MVT MaskContainerVT =
10287 getContainerForFixedLengthVector(Mask.getSimpleValueType());
10288 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
10289 }
10290 }
10291
10292 if (!IsVPTrunc) {
10293 std::tie(Mask, VL) =
10294 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10295 }
10296
10297 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
10298 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
10299
10300 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10301 DAG.getUNDEF(ContainerVT), SplatOne, VL);
10302 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
10303 DAG.getUNDEF(ContainerVT), SplatZero, VL);
10304
10305 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
10306 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
10307 DAG.getUNDEF(ContainerVT), Mask, VL);
10308 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
10309 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
10310 DAG.getUNDEF(MaskContainerVT), Mask, VL});
10311 if (MaskVT.isFixedLengthVector())
10312 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
10313 return Trunc;
10314}
10315
10316SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
10317 SelectionDAG &DAG) const {
10318 unsigned Opc = Op.getOpcode();
10319 bool IsVPTrunc = Opc == ISD::VP_TRUNCATE;
10320 SDLoc DL(Op);
10321
10322 MVT VT = Op.getSimpleValueType();
10323 // Only custom-lower vector truncates
10324 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
10325
10326 // Truncates to mask types are handled differently
10327 if (VT.getVectorElementType() == MVT::i1)
10328 return lowerVectorMaskTruncLike(Op, DAG);
10329
10330 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
10331 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
10332 // truncate by one power of two at a time.
10333 MVT DstEltVT = VT.getVectorElementType();
10334
10335 SDValue Src = Op.getOperand(0);
10336 MVT SrcVT = Src.getSimpleValueType();
10337 MVT SrcEltVT = SrcVT.getVectorElementType();
10338
10339 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
10340 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
10341 "Unexpected vector truncate lowering");
10342
10343 MVT ContainerVT = SrcVT;
10344 SDValue Mask, VL;
10345 if (IsVPTrunc) {
10346 Mask = Op.getOperand(1);
10347 VL = Op.getOperand(2);
10348 }
10349 if (SrcVT.isFixedLengthVector()) {
10350 ContainerVT = getContainerForFixedLengthVector(SrcVT);
10351 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
10352 if (IsVPTrunc) {
10353 MVT MaskVT = getMaskTypeFor(ContainerVT);
10354 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10355 }
10356 }
10357
10358 SDValue Result = Src;
10359 if (!IsVPTrunc) {
10360 std::tie(Mask, VL) =
10361 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
10362 }
10363
10364 unsigned NewOpc;
10366 NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_SSAT;
10367 else if (Opc == ISD::TRUNCATE_USAT_U)
10368 NewOpc = RISCVISD::TRUNCATE_VECTOR_VL_USAT;
10369 else
10370 NewOpc = RISCVISD::TRUNCATE_VECTOR_VL;
10371
10372 do {
10373 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
10374 MVT ResultVT = ContainerVT.changeVectorElementType(SrcEltVT);
10375 Result = DAG.getNode(NewOpc, DL, ResultVT, Result, Mask, VL);
10376 } while (SrcEltVT != DstEltVT);
10377
10378 if (SrcVT.isFixedLengthVector())
10379 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
10380
10381 return Result;
10382}
10383
10384SDValue
10385RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
10386 SelectionDAG &DAG) const {
10387 SDLoc DL(Op);
10388 SDValue Chain = Op.getOperand(0);
10389 SDValue Src = Op.getOperand(1);
10390 MVT VT = Op.getSimpleValueType();
10391 MVT SrcVT = Src.getSimpleValueType();
10392 MVT ContainerVT = VT;
10393 if (VT.isFixedLengthVector()) {
10394 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
10395 ContainerVT =
10396 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
10397 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
10398 }
10399
10400 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
10401
10402 // RVV can only widen/truncate fp to types double/half the size as the source.
10403 if ((VT.getVectorElementType() == MVT::f64 &&
10404 (SrcVT.getVectorElementType() == MVT::f16 ||
10405 SrcVT.getVectorElementType() == MVT::bf16)) ||
10406 ((VT.getVectorElementType() == MVT::f16 ||
10407 VT.getVectorElementType() == MVT::bf16) &&
10408 SrcVT.getVectorElementType() == MVT::f64)) {
10409 // For double rounding, the intermediate rounding should be round-to-odd.
10410 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
10411 ? RISCVISD::STRICT_FP_EXTEND_VL
10412 : RISCVISD::STRICT_VFNCVT_ROD_VL;
10413 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
10414 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
10415 Chain, Src, Mask, VL);
10416 Chain = Src.getValue(1);
10417 }
10418
10419 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
10420 ? RISCVISD::STRICT_FP_EXTEND_VL
10421 : RISCVISD::STRICT_FP_ROUND_VL;
10422 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
10423 Chain, Src, Mask, VL);
10424 if (VT.isFixedLengthVector()) {
10425 // StrictFP operations have two result values. Their lowered result should
10426 // have same result count.
10427 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
10428 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
10429 }
10430 return Res;
10431}
10432
10433SDValue
10434RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
10435 SelectionDAG &DAG) const {
10436 bool IsVP =
10437 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
10438 bool IsExtend =
10439 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
10440 // RVV can only do truncate fp to types half the size as the source. We
10441 // custom-lower f64->f16 rounds via RVV's round-to-odd float
10442 // conversion instruction.
10443 SDLoc DL(Op);
10444 MVT VT = Op.getSimpleValueType();
10445
10446 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
10447
10448 SDValue Src = Op.getOperand(0);
10449 MVT SrcVT = Src.getSimpleValueType();
10450
10451 bool IsDirectExtend =
10452 IsExtend && (VT.getVectorElementType() != MVT::f64 ||
10453 (SrcVT.getVectorElementType() != MVT::f16 &&
10454 SrcVT.getVectorElementType() != MVT::bf16));
10455 bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 &&
10456 VT.getVectorElementType() != MVT::bf16) ||
10457 SrcVT.getVectorElementType() != MVT::f64);
10458
10459 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
10460
10461 // We have regular SD node patterns for direct non-VL extends.
10462 if (VT.isScalableVector() && IsDirectConv && !IsVP)
10463 return Op;
10464
10465 // Prepare any fixed-length vector operands.
10466 MVT ContainerVT = VT;
10467 SDValue Mask, VL;
10468 if (IsVP) {
10469 Mask = Op.getOperand(1);
10470 VL = Op.getOperand(2);
10471 }
10472 if (VT.isFixedLengthVector()) {
10473 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
10474 ContainerVT =
10475 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
10476 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
10477 if (IsVP) {
10478 MVT MaskVT = getMaskTypeFor(ContainerVT);
10479 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
10480 }
10481 }
10482
10483 if (!IsVP)
10484 std::tie(Mask, VL) =
10485 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
10486
10487 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
10488
10489 if (IsDirectConv) {
10490 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
10491 if (VT.isFixedLengthVector())
10492 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
10493 return Src;
10494 }
10495
10496 unsigned InterConvOpc =
10497 IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::VFNCVT_ROD_VL;
10498
10499 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
10500 SDValue IntermediateConv =
10501 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
10502 SDValue Result =
10503 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
10504 if (VT.isFixedLengthVector())
10505 return convertFromScalableVector(VT, Result, DAG, Subtarget);
10506 return Result;
10507}
10508
10509// Given a scalable vector type and an index into it, returns the type for the
10510// smallest subvector that the index fits in. This can be used to reduce LMUL
10511// for operations like vslidedown.
10512//
10513// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
10514static std::optional<MVT>
10515getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
10516 const RISCVSubtarget &Subtarget) {
10517 assert(VecVT.isScalableVector());
10518 const unsigned EltSize = VecVT.getScalarSizeInBits();
10519 const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
10520 const unsigned MinVLMAX = VectorBitsMin / EltSize;
10521 MVT SmallerVT;
10522 if (MaxIdx < MinVLMAX)
10523 SmallerVT = RISCVTargetLowering::getM1VT(VecVT);
10524 else if (MaxIdx < MinVLMAX * 2)
10525 SmallerVT =
10527 else if (MaxIdx < MinVLMAX * 4)
10528 SmallerVT = RISCVTargetLowering::getM1VT(VecVT)
10531 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
10532 return std::nullopt;
10533 return SmallerVT;
10534}
10535
10537 auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
10538 if (!IdxC || isNullConstant(Idx))
10539 return false;
10540 return isUInt<5>(IdxC->getZExtValue());
10541}
10542
10543// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
10544// first position of a vector, and that vector is slid up to the insert index.
10545// By limiting the active vector length to index+1 and merging with the
10546// original vector (with an undisturbed tail policy for elements >= VL), we
10547// achieve the desired result of leaving all elements untouched except the one
10548// at VL-1, which is replaced with the desired value.
10549SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
10550 SelectionDAG &DAG) const {
10551 SDLoc DL(Op);
10552 MVT VecVT = Op.getSimpleValueType();
10553 MVT XLenVT = Subtarget.getXLenVT();
10554 SDValue Vec = Op.getOperand(0);
10555 SDValue Val = Op.getOperand(1);
10556 MVT ValVT = Val.getSimpleValueType();
10557 SDValue Idx = Op.getOperand(2);
10558
10559 if (VecVT.getVectorElementType() == MVT::i1) {
10560 // FIXME: For now we just promote to an i8 vector and insert into that,
10561 // but this is probably not optimal.
10562 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
10563 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
10564 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
10565 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
10566 }
10567
10568 if ((ValVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
10569 (ValVT == MVT::bf16 && !Subtarget.hasVInstructionsBF16())) {
10570 // If we don't have vfmv.s.f for f16/bf16, use fmv.x.h first.
10571 MVT IntVT = VecVT.changeTypeToInteger();
10572 SDValue IntInsert = DAG.getNode(
10573 ISD::INSERT_VECTOR_ELT, DL, IntVT, DAG.getBitcast(IntVT, Vec),
10574 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Val), Idx);
10575 return DAG.getBitcast(VecVT, IntInsert);
10576 }
10577
10578 MVT ContainerVT = VecVT;
10579 // If the operand is a fixed-length vector, convert to a scalable one.
10580 if (VecVT.isFixedLengthVector()) {
10581 ContainerVT = getContainerForFixedLengthVector(VecVT);
10582 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10583 }
10584
10585 // If we know the index we're going to insert at, we can shrink Vec so that
10586 // we're performing the scalar inserts and slideup on a smaller LMUL.
10587 SDValue OrigVec = Vec;
10588 std::optional<unsigned> AlignedIdx;
10589 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
10590 const unsigned OrigIdx = IdxC->getZExtValue();
10591 // Do we know an upper bound on LMUL?
10592 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
10593 DL, DAG, Subtarget)) {
10594 ContainerVT = *ShrunkVT;
10595 AlignedIdx = 0;
10596 }
10597
10598 // If we're compiling for an exact VLEN value, we can always perform
10599 // the insert in m1 as we can determine the register corresponding to
10600 // the index in the register group.
10601 const MVT M1VT = RISCVTargetLowering::getM1VT(ContainerVT);
10602 if (auto VLEN = Subtarget.getRealVLen(); VLEN && ContainerVT.bitsGT(M1VT)) {
10603 EVT ElemVT = VecVT.getVectorElementType();
10604 unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
10605 unsigned RemIdx = OrigIdx % ElemsPerVReg;
10606 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
10607 AlignedIdx = SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
10608 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
10609 ContainerVT = M1VT;
10610 }
10611
10612 if (AlignedIdx)
10613 Vec = DAG.getExtractSubvector(DL, ContainerVT, Vec, *AlignedIdx);
10614 }
10615
10616 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
10617 // Even i64-element vectors on RV32 can be lowered without scalar
10618 // legalization if the most-significant 32 bits of the value are not affected
10619 // by the sign-extension of the lower 32 bits.
10620 // TODO: We could also catch sign extensions of a 32-bit value.
10621 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
10622 const auto *CVal = cast<ConstantSDNode>(Val);
10623 if (isInt<32>(CVal->getSExtValue())) {
10624 IsLegalInsert = true;
10625 Val = DAG.getSignedConstant(CVal->getSExtValue(), DL, MVT::i32);
10626 }
10627 }
10628
10629 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10630
10631 SDValue ValInVec;
10632
10633 if (IsLegalInsert) {
10634 unsigned Opc =
10635 VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
10636 if (isNullConstant(Idx)) {
10637 if (!VecVT.isFloatingPoint())
10638 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
10639 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
10640
10641 if (AlignedIdx)
10642 Vec = DAG.getInsertSubvector(DL, OrigVec, Vec, *AlignedIdx);
10643 if (!VecVT.isFixedLengthVector())
10644 return Vec;
10645 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
10646 }
10647
10648 // Use ri.vinsert.v.x if available.
10649 if (Subtarget.hasVendorXRivosVisni() && VecVT.isInteger() &&
10651 // Tail policy applies to elements past VLMAX (by assumption Idx < VLMAX)
10652 SDValue PolicyOp =
10654 Vec = DAG.getNode(RISCVISD::RI_VINSERT_VL, DL, ContainerVT, Vec, Val, Idx,
10655 VL, PolicyOp);
10656 if (AlignedIdx)
10657 Vec = DAG.getInsertSubvector(DL, OrigVec, Vec, *AlignedIdx);
10658 if (!VecVT.isFixedLengthVector())
10659 return Vec;
10660 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
10661 }
10662
10663 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
10664 } else {
10665 // On RV32, i64-element vectors must be specially handled to place the
10666 // value at element 0, by using two vslide1down instructions in sequence on
10667 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
10668 // this.
10669 SDValue ValLo, ValHi;
10670 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
10671 MVT I32ContainerVT =
10672 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
10673 SDValue I32Mask =
10674 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
10675 // Limit the active VL to two.
10676 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
10677 // If the Idx is 0 we can insert directly into the vector.
10678 if (isNullConstant(Idx)) {
10679 // First slide in the lo value, then the hi in above it. We use slide1down
10680 // to avoid the register group overlap constraint of vslide1up.
10681 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10682 Vec, Vec, ValLo, I32Mask, InsertI64VL);
10683 // If the source vector is undef don't pass along the tail elements from
10684 // the previous slide1down.
10685 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
10686 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10687 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
10688 // Bitcast back to the right container type.
10689 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
10690
10691 if (AlignedIdx)
10692 ValInVec = DAG.getInsertSubvector(DL, OrigVec, ValInVec, *AlignedIdx);
10693 if (!VecVT.isFixedLengthVector())
10694 return ValInVec;
10695 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
10696 }
10697
10698 // First slide in the lo value, then the hi in above it. We use slide1down
10699 // to avoid the register group overlap constraint of vslide1up.
10700 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10701 DAG.getUNDEF(I32ContainerVT),
10702 DAG.getUNDEF(I32ContainerVT), ValLo,
10703 I32Mask, InsertI64VL);
10704 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
10705 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
10706 I32Mask, InsertI64VL);
10707 // Bitcast back to the right container type.
10708 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
10709 }
10710
10711 // Now that the value is in a vector, slide it into position.
10712 SDValue InsertVL =
10713 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
10714
10715 // Use tail agnostic policy if Idx is the last index of Vec.
10717 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
10718 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
10720 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
10721 Idx, Mask, InsertVL, Policy);
10722
10723 if (AlignedIdx)
10724 Slideup = DAG.getInsertSubvector(DL, OrigVec, Slideup, *AlignedIdx);
10725 if (!VecVT.isFixedLengthVector())
10726 return Slideup;
10727 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
10728}
10729
10730// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
10731// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
10732// types this is done using VMV_X_S to allow us to glean information about the
10733// sign bits of the result.
10734SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
10735 SelectionDAG &DAG) const {
10736 SDLoc DL(Op);
10737 SDValue Idx = Op.getOperand(1);
10738 SDValue Vec = Op.getOperand(0);
10739 EVT EltVT = Op.getValueType();
10740 MVT VecVT = Vec.getSimpleValueType();
10741 MVT XLenVT = Subtarget.getXLenVT();
10742
10743 if (VecVT.getVectorElementType() == MVT::i1) {
10744 // Use vfirst.m to extract the first bit.
10745 if (isNullConstant(Idx)) {
10746 MVT ContainerVT = VecVT;
10747 if (VecVT.isFixedLengthVector()) {
10748 ContainerVT = getContainerForFixedLengthVector(VecVT);
10749 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10750 }
10751 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10752 SDValue Vfirst =
10753 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
10754 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
10755 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
10756 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
10757 }
10758 if (VecVT.isFixedLengthVector()) {
10759 unsigned NumElts = VecVT.getVectorNumElements();
10760 if (NumElts >= 8) {
10761 MVT WideEltVT;
10762 unsigned WidenVecLen;
10763 SDValue ExtractElementIdx;
10764 SDValue ExtractBitIdx;
10765 unsigned MaxEEW = Subtarget.getELen();
10766 MVT LargestEltVT = MVT::getIntegerVT(
10767 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
10768 if (NumElts <= LargestEltVT.getSizeInBits()) {
10769 assert(isPowerOf2_32(NumElts) &&
10770 "the number of elements should be power of 2");
10771 WideEltVT = MVT::getIntegerVT(NumElts);
10772 WidenVecLen = 1;
10773 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
10774 ExtractBitIdx = Idx;
10775 } else {
10776 WideEltVT = LargestEltVT;
10777 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
10778 // extract element index = index / element width
10779 ExtractElementIdx = DAG.getNode(
10780 ISD::SRL, DL, XLenVT, Idx,
10781 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
10782 // mask bit index = index % element width
10783 ExtractBitIdx = DAG.getNode(
10784 ISD::AND, DL, XLenVT, Idx,
10785 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
10786 }
10787 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
10788 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
10789 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
10790 Vec, ExtractElementIdx);
10791 // Extract the bit from GPR.
10792 SDValue ShiftRight =
10793 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
10794 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
10795 DAG.getConstant(1, DL, XLenVT));
10796 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
10797 }
10798 }
10799 // Otherwise, promote to an i8 vector and extract from that.
10800 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
10801 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
10802 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
10803 }
10804
10805 if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
10806 (EltVT == MVT::bf16 && !Subtarget.hasVInstructionsBF16())) {
10807 // If we don't have vfmv.f.s for f16/bf16, extract to a gpr then use fmv.h.x
10808 MVT IntVT = VecVT.changeTypeToInteger();
10809 SDValue IntVec = DAG.getBitcast(IntVT, Vec);
10810 SDValue IntExtract =
10811 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT, IntVec, Idx);
10812 return DAG.getNode(RISCVISD::FMV_H_X, DL, EltVT, IntExtract);
10813 }
10814
10815 if (Subtarget.enablePExtCodeGen() && VecVT.isFixedLengthVector()) {
10816 if (VecVT != MVT::v4i16 && VecVT != MVT::v2i16 && VecVT != MVT::v8i8 &&
10817 VecVT != MVT::v4i8 && VecVT != MVT::v2i32)
10818 return SDValue();
10819 SDValue Extracted = DAG.getBitcast(XLenVT, Vec);
10820 unsigned ElemWidth = VecVT.getVectorElementType().getSizeInBits();
10821 SDValue Shamt = DAG.getNode(ISD::MUL, DL, XLenVT, Idx,
10822 DAG.getConstant(ElemWidth, DL, XLenVT));
10823 return DAG.getNode(ISD::SRL, DL, XLenVT, Extracted, Shamt);
10824 }
10825
10826 // If this is a fixed vector, we need to convert it to a scalable vector.
10827 MVT ContainerVT = VecVT;
10828 if (VecVT.isFixedLengthVector()) {
10829 ContainerVT = getContainerForFixedLengthVector(VecVT);
10830 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10831 }
10832
10833 // If we're compiling for an exact VLEN value and we have a known
10834 // constant index, we can always perform the extract in m1 (or
10835 // smaller) as we can determine the register corresponding to
10836 // the index in the register group.
10837 const auto VLen = Subtarget.getRealVLen();
10838 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
10839 IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
10840 MVT M1VT = RISCVTargetLowering::getM1VT(ContainerVT);
10841 unsigned OrigIdx = IdxC->getZExtValue();
10842 EVT ElemVT = VecVT.getVectorElementType();
10843 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
10844 unsigned RemIdx = OrigIdx % ElemsPerVReg;
10845 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
10846 unsigned ExtractIdx =
10847 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
10848 Vec = DAG.getExtractSubvector(DL, M1VT, Vec, ExtractIdx);
10849 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
10850 ContainerVT = M1VT;
10851 }
10852
10853 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
10854 // contains our index.
10855 std::optional<uint64_t> MaxIdx;
10856 if (VecVT.isFixedLengthVector())
10857 MaxIdx = VecVT.getVectorNumElements() - 1;
10858 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
10859 MaxIdx = IdxC->getZExtValue();
10860 if (MaxIdx) {
10861 if (auto SmallerVT =
10862 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
10863 ContainerVT = *SmallerVT;
10864 Vec = DAG.getExtractSubvector(DL, ContainerVT, Vec, 0);
10865 }
10866 }
10867
10868 // Use ri.vextract.x.v if available.
10869 // TODO: Avoid index 0 and just use the vmv.x.s
10870 if (Subtarget.hasVendorXRivosVisni() && EltVT.isInteger() &&
10872 SDValue Elt = DAG.getNode(RISCVISD::RI_VEXTRACT, DL, XLenVT, Vec, Idx);
10873 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt);
10874 }
10875
10876 // If after narrowing, the required slide is still greater than LMUL2,
10877 // fallback to generic expansion and go through the stack. This is done
10878 // for a subtle reason: extracting *all* elements out of a vector is
10879 // widely expected to be linear in vector size, but because vslidedown
10880 // is linear in LMUL, performing N extracts using vslidedown becomes
10881 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
10882 // seems to have the same problem (the store is linear in LMUL), but the
10883 // generic expansion *memoizes* the store, and thus for many extracts of
10884 // the same vector we end up with one store and a bunch of loads.
10885 // TODO: We don't have the same code for insert_vector_elt because we
10886 // have BUILD_VECTOR and handle the degenerate case there. Should we
10887 // consider adding an inverse BUILD_VECTOR node?
10888 MVT LMUL2VT =
10890 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
10891 return SDValue();
10892
10893 // If the index is 0, the vector is already in the right position.
10894 if (!isNullConstant(Idx)) {
10895 // Use a VL of 1 to avoid processing more elements than we need.
10896 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
10897 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
10898 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
10899 }
10900
10901 if (!EltVT.isInteger()) {
10902 // Floating-point extracts are handled in TableGen.
10903 return DAG.getExtractVectorElt(DL, EltVT, Vec, 0);
10904 }
10905
10906 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
10907 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
10908}
10909
10910// Some RVV intrinsics may claim that they want an integer operand to be
10911// promoted or expanded.
10913 const RISCVSubtarget &Subtarget) {
10914 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
10915 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
10916 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
10917 "Unexpected opcode");
10918
10919 if (!Subtarget.hasVInstructions())
10920 return SDValue();
10921
10922 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
10923 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
10924 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
10925
10926 SDLoc DL(Op);
10927
10929 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
10930 if (!II || !II->hasScalarOperand())
10931 return SDValue();
10932
10933 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
10934 assert(SplatOp < Op.getNumOperands());
10935
10936 SmallVector<SDValue, 8> Operands(Op->ops());
10937 SDValue &ScalarOp = Operands[SplatOp];
10938 MVT OpVT = ScalarOp.getSimpleValueType();
10939 MVT XLenVT = Subtarget.getXLenVT();
10940
10941 // If this isn't a scalar, or its type is XLenVT we're done.
10942 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
10943 return SDValue();
10944
10945 // Simplest case is that the operand needs to be promoted to XLenVT.
10946 if (OpVT.bitsLT(XLenVT)) {
10947 // If the operand is a constant, sign extend to increase our chances
10948 // of being able to use a .vi instruction. ANY_EXTEND would become a
10949 // a zero extend and the simm5 check in isel would fail.
10950 // FIXME: Should we ignore the upper bits in isel instead?
10951 unsigned ExtOpc =
10953 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
10954 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
10955 }
10956
10957 // Use the previous operand to get the vXi64 VT. The result might be a mask
10958 // VT for compares. Using the previous operand assumes that the previous
10959 // operand will never have a smaller element size than a scalar operand and
10960 // that a widening operation never uses SEW=64.
10961 // NOTE: If this fails the below assert, we can probably just find the
10962 // element count from any operand or result and use it to construct the VT.
10963 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
10964 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
10965
10966 // The more complex case is when the scalar is larger than XLenVT.
10967 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
10968 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
10969
10970 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
10971 // instruction to sign-extend since SEW>XLEN.
10972 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
10973 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
10974 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
10975 }
10976
10977 switch (IntNo) {
10978 case Intrinsic::riscv_vslide1up:
10979 case Intrinsic::riscv_vslide1down:
10980 case Intrinsic::riscv_vslide1up_mask:
10981 case Intrinsic::riscv_vslide1down_mask: {
10982 // We need to special case these when the scalar is larger than XLen.
10983 unsigned NumOps = Op.getNumOperands();
10984 bool IsMasked = NumOps == 7;
10985
10986 // Convert the vector source to the equivalent nxvXi32 vector.
10987 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
10988 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
10989 SDValue ScalarLo, ScalarHi;
10990 std::tie(ScalarLo, ScalarHi) =
10991 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
10992
10993 // Double the VL since we halved SEW.
10994 SDValue AVL = getVLOperand(Op);
10995 SDValue I32VL;
10996
10997 // Optimize for constant AVL
10998 if (isa<ConstantSDNode>(AVL)) {
10999 const auto [MinVLMAX, MaxVLMAX] =
11001
11002 uint64_t AVLInt = AVL->getAsZExtVal();
11003 if (AVLInt <= MinVLMAX) {
11004 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
11005 } else if (AVLInt >= 2 * MaxVLMAX) {
11006 // Just set vl to VLMAX in this situation
11007 I32VL = DAG.getRegister(RISCV::X0, XLenVT);
11008 } else {
11009 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
11010 // is related to the hardware implementation.
11011 // So let the following code handle
11012 }
11013 }
11014 if (!I32VL) {
11016 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
11017 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
11018 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
11019 SDValue SETVL =
11020 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
11021 // Using vsetvli instruction to get actually used length which related to
11022 // the hardware implementation
11023 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
11024 SEW, LMUL);
11025 I32VL =
11026 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
11027 }
11028
11029 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
11030
11031 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
11032 // instructions.
11033 SDValue Passthru;
11034 if (IsMasked)
11035 Passthru = DAG.getUNDEF(I32VT);
11036 else
11037 Passthru = DAG.getBitcast(I32VT, Operands[1]);
11038
11039 if (IntNo == Intrinsic::riscv_vslide1up ||
11040 IntNo == Intrinsic::riscv_vslide1up_mask) {
11041 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
11042 ScalarHi, I32Mask, I32VL);
11043 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
11044 ScalarLo, I32Mask, I32VL);
11045 } else {
11046 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
11047 ScalarLo, I32Mask, I32VL);
11048 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
11049 ScalarHi, I32Mask, I32VL);
11050 }
11051
11052 // Convert back to nxvXi64.
11053 Vec = DAG.getBitcast(VT, Vec);
11054
11055 if (!IsMasked)
11056 return Vec;
11057 // Apply mask after the operation.
11058 SDValue Mask = Operands[NumOps - 3];
11059 SDValue MaskedOff = Operands[1];
11060 // Assume Policy operand is the last operand.
11061 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();
11062 // We don't need to select maskedoff if it's undef.
11063 if (MaskedOff.isUndef())
11064 return Vec;
11065 // TAMU
11066 if (Policy == RISCVVType::TAIL_AGNOSTIC)
11067 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
11068 DAG.getUNDEF(VT), AVL);
11069 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
11070 // It's fine because vmerge does not care mask policy.
11071 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
11072 MaskedOff, AVL);
11073 }
11074 }
11075
11076 // We need to convert the scalar to a splat vector.
11077 SDValue VL = getVLOperand(Op);
11078 assert(VL.getValueType() == XLenVT);
11079 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
11080 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
11081}
11082
11083// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
11084// scalable vector llvm.get.vector.length for now.
11085//
11086// We need to convert from a scalable VF to a vsetvli with VLMax equal to
11087// (vscale * VF). The vscale and VF are independent of element width. We use
11088// SEW=8 for the vsetvli because it is the only element width that supports all
11089// fractional LMULs. The LMUL is chosen so that with SEW=8 the VLMax is
11090// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
11091// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
11092// SEW and LMUL are better for the surrounding vector instructions.
11094 const RISCVSubtarget &Subtarget) {
11095 MVT XLenVT = Subtarget.getXLenVT();
11096
11097 // The smallest LMUL is only valid for the smallest element width.
11098 const unsigned ElementWidth = 8;
11099
11100 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
11101 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
11102 // We don't support VF==1 with ELEN==32.
11103 [[maybe_unused]] unsigned MinVF =
11104 RISCV::RVVBitsPerBlock / Subtarget.getELen();
11105
11106 [[maybe_unused]] unsigned VF = N->getConstantOperandVal(2);
11107 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
11108 "Unexpected VF");
11109
11110 bool Fractional = VF < LMul1VF;
11111 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
11112 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
11113 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
11114
11115 SDLoc DL(N);
11116
11117 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
11118 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
11119
11120 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
11121
11122 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
11123 SDValue Res =
11124 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
11125 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
11126}
11127
11129 const RISCVSubtarget &Subtarget) {
11130 SDValue Op0 = N->getOperand(1);
11131 MVT OpVT = Op0.getSimpleValueType();
11132 MVT ContainerVT = OpVT;
11133 if (OpVT.isFixedLengthVector()) {
11134 ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget);
11135 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
11136 }
11137 MVT XLenVT = Subtarget.getXLenVT();
11138 SDLoc DL(N);
11139 auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);
11140 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);
11141 if (isOneConstant(N->getOperand(2)))
11142 return Res;
11143
11144 // Convert -1 to VL.
11145 SDValue Setcc =
11146 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
11147 VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());
11148 return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);
11149}
11150
11151static inline void promoteVCIXScalar(SDValue Op,
11152 MutableArrayRef<SDValue> Operands,
11153 SelectionDAG &DAG) {
11154 const RISCVSubtarget &Subtarget =
11156
11157 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
11158 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
11159 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
11160 SDLoc DL(Op);
11161
11163 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
11164 if (!II || !II->hasScalarOperand())
11165 return;
11166
11167 unsigned SplatOp = II->ScalarOperand + 1;
11168 assert(SplatOp < Op.getNumOperands());
11169
11170 SDValue &ScalarOp = Operands[SplatOp];
11171 MVT OpVT = ScalarOp.getSimpleValueType();
11172 MVT XLenVT = Subtarget.getXLenVT();
11173
11174 // The code below is partially copied from lowerVectorIntrinsicScalars.
11175 // If this isn't a scalar, or its type is XLenVT we're done.
11176 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
11177 return;
11178
11179 // Manually emit promote operation for scalar operation.
11180 if (OpVT.bitsLT(XLenVT)) {
11181 unsigned ExtOpc =
11183 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
11184 }
11185}
11186
11187static void processVCIXOperands(SDValue OrigOp,
11188 MutableArrayRef<SDValue> Operands,
11189 SelectionDAG &DAG) {
11190 promoteVCIXScalar(OrigOp, Operands, DAG);
11191 const RISCVSubtarget &Subtarget =
11193 for (SDValue &V : Operands) {
11194 EVT ValType = V.getValueType();
11195 if (ValType.isVector() && ValType.isFloatingPoint()) {
11196 MVT InterimIVT =
11197 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
11198 ValType.getVectorElementCount());
11199 V = DAG.getBitcast(InterimIVT, V);
11200 }
11201 if (ValType.isFixedLengthVector()) {
11202 MVT OpContainerVT = getContainerForFixedLengthVector(
11203 DAG, V.getSimpleValueType(), Subtarget);
11204 V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
11205 }
11206 }
11207}
11208
11209// LMUL * VLEN should be greater than or equal to EGS * SEW
11210static inline bool isValidEGW(int EGS, EVT VT,
11211 const RISCVSubtarget &Subtarget) {
11212 return (Subtarget.getRealMinVLen() *
11214 EGS * VT.getScalarSizeInBits();
11215}
11216
11217SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
11218 SelectionDAG &DAG) const {
11219 unsigned IntNo = Op.getConstantOperandVal(0);
11220 SDLoc DL(Op);
11221 MVT XLenVT = Subtarget.getXLenVT();
11222
11223 switch (IntNo) {
11224 default:
11225 break; // Don't custom lower most intrinsics.
11226 case Intrinsic::riscv_tuple_insert: {
11227 SDValue Vec = Op.getOperand(1);
11228 SDValue SubVec = Op.getOperand(2);
11229 SDValue Index = Op.getOperand(3);
11230
11231 return DAG.getNode(RISCVISD::TUPLE_INSERT, DL, Op.getValueType(), Vec,
11232 SubVec, Index);
11233 }
11234 case Intrinsic::riscv_tuple_extract: {
11235 SDValue Vec = Op.getOperand(1);
11236 SDValue Index = Op.getOperand(2);
11237
11238 return DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, Op.getValueType(), Vec,
11239 Index);
11240 }
11241 case Intrinsic::thread_pointer: {
11242 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11243 return DAG.getRegister(RISCV::X4, PtrVT);
11244 }
11245 case Intrinsic::riscv_orc_b:
11246 case Intrinsic::riscv_brev8:
11247 case Intrinsic::riscv_sha256sig0:
11248 case Intrinsic::riscv_sha256sig1:
11249 case Intrinsic::riscv_sha256sum0:
11250 case Intrinsic::riscv_sha256sum1:
11251 case Intrinsic::riscv_sm3p0:
11252 case Intrinsic::riscv_sm3p1: {
11253 unsigned Opc;
11254 switch (IntNo) {
11255 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
11256 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
11257 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
11258 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
11259 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
11260 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
11261 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
11262 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
11263 }
11264
11265 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
11266 }
11267 case Intrinsic::riscv_sm4ks:
11268 case Intrinsic::riscv_sm4ed: {
11269 unsigned Opc =
11270 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
11271
11272 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
11273 Op.getOperand(3));
11274 }
11275 case Intrinsic::riscv_zip:
11276 case Intrinsic::riscv_unzip: {
11277 unsigned Opc =
11278 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
11279 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
11280 }
11281 case Intrinsic::riscv_mopr:
11282 return DAG.getNode(RISCVISD::MOP_R, DL, XLenVT, Op.getOperand(1),
11283 Op.getOperand(2));
11284
11285 case Intrinsic::riscv_moprr: {
11286 return DAG.getNode(RISCVISD::MOP_RR, DL, XLenVT, Op.getOperand(1),
11287 Op.getOperand(2), Op.getOperand(3));
11288 }
11289 case Intrinsic::riscv_clmul:
11290 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
11291 Op.getOperand(2));
11292 case Intrinsic::riscv_clmulh:
11293 case Intrinsic::riscv_clmulr: {
11294 unsigned Opc =
11295 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
11296 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
11297 }
11298 case Intrinsic::experimental_get_vector_length:
11299 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
11300 case Intrinsic::experimental_cttz_elts:
11301 return lowerCttzElts(Op.getNode(), DAG, Subtarget);
11302 case Intrinsic::riscv_vmv_x_s: {
11303 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
11304 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
11305 }
11306 case Intrinsic::riscv_vfmv_f_s:
11307 return DAG.getExtractVectorElt(DL, Op.getValueType(), Op.getOperand(1), 0);
11308 case Intrinsic::riscv_vmv_v_x:
11309 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
11310 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
11311 Subtarget);
11312 case Intrinsic::riscv_vfmv_v_f:
11313 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
11314 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
11315 case Intrinsic::riscv_vmv_s_x: {
11316 SDValue Scalar = Op.getOperand(2);
11317
11318 if (Scalar.getValueType().bitsLE(XLenVT)) {
11319 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
11320 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
11321 Op.getOperand(1), Scalar, Op.getOperand(3));
11322 }
11323
11324 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
11325
11326 // This is an i64 value that lives in two scalar registers. We have to
11327 // insert this in a convoluted way. First we build vXi64 splat containing
11328 // the two values that we assemble using some bit math. Next we'll use
11329 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
11330 // to merge element 0 from our splat into the source vector.
11331 // FIXME: This is probably not the best way to do this, but it is
11332 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
11333 // point.
11334 // sw lo, (a0)
11335 // sw hi, 4(a0)
11336 // vlse vX, (a0)
11337 //
11338 // vid.v vVid
11339 // vmseq.vx mMask, vVid, 0
11340 // vmerge.vvm vDest, vSrc, vVal, mMask
11341 MVT VT = Op.getSimpleValueType();
11342 SDValue Vec = Op.getOperand(1);
11343 SDValue VL = getVLOperand(Op);
11344
11345 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
11346 if (Op.getOperand(1).isUndef())
11347 return SplattedVal;
11348 SDValue SplattedIdx =
11349 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
11350 DAG.getConstant(0, DL, MVT::i32), VL);
11351
11352 MVT MaskVT = getMaskTypeFor(VT);
11353 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
11354 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
11355 SDValue SelectCond =
11356 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
11357 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
11358 DAG.getUNDEF(MaskVT), Mask, VL});
11359 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,
11360 Vec, DAG.getUNDEF(VT), VL);
11361 }
11362 case Intrinsic::riscv_vfmv_s_f:
11363 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(),
11364 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
11365 // EGS * EEW >= 128 bits
11366 case Intrinsic::riscv_vaesdf_vv:
11367 case Intrinsic::riscv_vaesdf_vs:
11368 case Intrinsic::riscv_vaesdm_vv:
11369 case Intrinsic::riscv_vaesdm_vs:
11370 case Intrinsic::riscv_vaesef_vv:
11371 case Intrinsic::riscv_vaesef_vs:
11372 case Intrinsic::riscv_vaesem_vv:
11373 case Intrinsic::riscv_vaesem_vs:
11374 case Intrinsic::riscv_vaeskf1:
11375 case Intrinsic::riscv_vaeskf2:
11376 case Intrinsic::riscv_vaesz_vs:
11377 case Intrinsic::riscv_vsm4k:
11378 case Intrinsic::riscv_vsm4r_vv:
11379 case Intrinsic::riscv_vsm4r_vs: {
11380 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
11381 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
11382 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
11383 reportFatalUsageError("EGW should be greater than or equal to 4 * SEW.");
11384 return Op;
11385 }
11386 // EGS * EEW >= 256 bits
11387 case Intrinsic::riscv_vsm3c:
11388 case Intrinsic::riscv_vsm3me: {
11389 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
11390 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
11391 reportFatalUsageError("EGW should be greater than or equal to 8 * SEW.");
11392 return Op;
11393 }
11394 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
11395 case Intrinsic::riscv_vsha2ch:
11396 case Intrinsic::riscv_vsha2cl:
11397 case Intrinsic::riscv_vsha2ms: {
11398 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
11399 !Subtarget.hasStdExtZvknhb())
11400 reportFatalUsageError("SEW=64 needs Zvknhb to be enabled.");
11401 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
11402 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
11403 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
11404 reportFatalUsageError("EGW should be greater than or equal to 4 * SEW.");
11405 return Op;
11406 }
11407 case Intrinsic::riscv_sf_vc_v_x:
11408 case Intrinsic::riscv_sf_vc_v_i:
11409 case Intrinsic::riscv_sf_vc_v_xv:
11410 case Intrinsic::riscv_sf_vc_v_iv:
11411 case Intrinsic::riscv_sf_vc_v_vv:
11412 case Intrinsic::riscv_sf_vc_v_fv:
11413 case Intrinsic::riscv_sf_vc_v_xvv:
11414 case Intrinsic::riscv_sf_vc_v_ivv:
11415 case Intrinsic::riscv_sf_vc_v_vvv:
11416 case Intrinsic::riscv_sf_vc_v_fvv:
11417 case Intrinsic::riscv_sf_vc_v_xvw:
11418 case Intrinsic::riscv_sf_vc_v_ivw:
11419 case Intrinsic::riscv_sf_vc_v_vvw:
11420 case Intrinsic::riscv_sf_vc_v_fvw: {
11421 MVT VT = Op.getSimpleValueType();
11422
11423 SmallVector<SDValue> Operands{Op->op_values()};
11424 processVCIXOperands(Op, Operands, DAG);
11425
11426 MVT RetVT = VT;
11427 if (VT.isFixedLengthVector())
11429 else if (VT.isFloatingPoint())
11432
11433 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);
11434
11435 if (VT.isFixedLengthVector())
11436 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
11437 else if (VT.isFloatingPoint())
11438 NewNode = DAG.getBitcast(VT, NewNode);
11439
11440 if (Op == NewNode)
11441 break;
11442
11443 return NewNode;
11444 }
11445 }
11446
11447 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
11448}
11449
11451 unsigned Type) {
11452 SDLoc DL(Op);
11453 SmallVector<SDValue> Operands{Op->op_values()};
11454 Operands.erase(Operands.begin() + 1);
11455
11456 const RISCVSubtarget &Subtarget =
11458 MVT VT = Op.getSimpleValueType();
11459 MVT RetVT = VT;
11460 MVT FloatVT = VT;
11461
11462 if (VT.isFloatingPoint()) {
11463 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
11464 VT.getVectorElementCount());
11465 FloatVT = RetVT;
11466 }
11467 if (VT.isFixedLengthVector())
11469 Subtarget);
11470
11471 processVCIXOperands(Op, Operands, DAG);
11472
11473 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
11474 SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);
11475 SDValue Chain = NewNode.getValue(1);
11476
11477 if (VT.isFixedLengthVector())
11478 NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
11479 if (VT.isFloatingPoint())
11480 NewNode = DAG.getBitcast(VT, NewNode);
11481
11482 NewNode = DAG.getMergeValues({NewNode, Chain}, DL);
11483
11484 return NewNode;
11485}
11486
11488 unsigned Type) {
11489 SmallVector<SDValue> Operands{Op->op_values()};
11490 Operands.erase(Operands.begin() + 1);
11491 processVCIXOperands(Op, Operands, DAG);
11492
11493 return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
11494}
11495
11496static SDValue
11498 const RISCVSubtarget &Subtarget,
11499 SelectionDAG &DAG) {
11500 bool IsStrided;
11501 switch (IntNo) {
11502 case Intrinsic::riscv_seg2_load_mask:
11503 case Intrinsic::riscv_seg3_load_mask:
11504 case Intrinsic::riscv_seg4_load_mask:
11505 case Intrinsic::riscv_seg5_load_mask:
11506 case Intrinsic::riscv_seg6_load_mask:
11507 case Intrinsic::riscv_seg7_load_mask:
11508 case Intrinsic::riscv_seg8_load_mask:
11509 IsStrided = false;
11510 break;
11511 case Intrinsic::riscv_sseg2_load_mask:
11512 case Intrinsic::riscv_sseg3_load_mask:
11513 case Intrinsic::riscv_sseg4_load_mask:
11514 case Intrinsic::riscv_sseg5_load_mask:
11515 case Intrinsic::riscv_sseg6_load_mask:
11516 case Intrinsic::riscv_sseg7_load_mask:
11517 case Intrinsic::riscv_sseg8_load_mask:
11518 IsStrided = true;
11519 break;
11520 default:
11521 llvm_unreachable("unexpected intrinsic ID");
11522 };
11523
11524 static const Intrinsic::ID VlsegInts[7] = {
11525 Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
11526 Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,
11527 Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
11528 Intrinsic::riscv_vlseg8_mask};
11529 static const Intrinsic::ID VlssegInts[7] = {
11530 Intrinsic::riscv_vlsseg2_mask, Intrinsic::riscv_vlsseg3_mask,
11531 Intrinsic::riscv_vlsseg4_mask, Intrinsic::riscv_vlsseg5_mask,
11532 Intrinsic::riscv_vlsseg6_mask, Intrinsic::riscv_vlsseg7_mask,
11533 Intrinsic::riscv_vlsseg8_mask};
11534
11535 SDLoc DL(Op);
11536 unsigned NF = Op->getNumValues() - 1;
11537 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
11538 MVT XLenVT = Subtarget.getXLenVT();
11539 MVT VT = Op->getSimpleValueType(0);
11540 MVT ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
11541 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
11542 ContainerVT.getScalarSizeInBits();
11543 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
11544
11545 // Operands: (chain, int_id, pointer, mask, vl) or
11546 // (chain, int_id, pointer, offset, mask, vl)
11547 SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
11548 SDValue Mask = Op.getOperand(Op.getNumOperands() - 2);
11549 MVT MaskVT = Mask.getSimpleValueType();
11550 MVT MaskContainerVT =
11551 ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
11552 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
11553
11554 SDValue IntID = DAG.getTargetConstant(
11555 IsStrided ? VlssegInts[NF - 2] : VlsegInts[NF - 2], DL, XLenVT);
11556 auto *Load = cast<MemIntrinsicSDNode>(Op);
11557
11558 SDVTList VTs = DAG.getVTList({VecTupTy, MVT::Other});
11560 Load->getChain(),
11561 IntID,
11562 DAG.getUNDEF(VecTupTy),
11563 Op.getOperand(2),
11564 Mask,
11565 VL,
11568 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
11569 // Insert the stride operand.
11570 if (IsStrided)
11571 Ops.insert(std::next(Ops.begin(), 4), Op.getOperand(3));
11572
11573 SDValue Result =
11575 Load->getMemoryVT(), Load->getMemOperand());
11577 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) {
11578 SDValue SubVec = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, ContainerVT,
11579 Result.getValue(0),
11580 DAG.getTargetConstant(RetIdx, DL, MVT::i32));
11581 Results.push_back(convertFromScalableVector(VT, SubVec, DAG, Subtarget));
11582 }
11583 Results.push_back(Result.getValue(1));
11584 return DAG.getMergeValues(Results, DL);
11585}
11586
11587SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
11588 SelectionDAG &DAG) const {
11589 unsigned IntNo = Op.getConstantOperandVal(1);
11590 switch (IntNo) {
11591 default:
11592 break;
11593 case Intrinsic::riscv_seg2_load_mask:
11594 case Intrinsic::riscv_seg3_load_mask:
11595 case Intrinsic::riscv_seg4_load_mask:
11596 case Intrinsic::riscv_seg5_load_mask:
11597 case Intrinsic::riscv_seg6_load_mask:
11598 case Intrinsic::riscv_seg7_load_mask:
11599 case Intrinsic::riscv_seg8_load_mask:
11600 case Intrinsic::riscv_sseg2_load_mask:
11601 case Intrinsic::riscv_sseg3_load_mask:
11602 case Intrinsic::riscv_sseg4_load_mask:
11603 case Intrinsic::riscv_sseg5_load_mask:
11604 case Intrinsic::riscv_sseg6_load_mask:
11605 case Intrinsic::riscv_sseg7_load_mask:
11606 case Intrinsic::riscv_sseg8_load_mask:
11607 return lowerFixedVectorSegLoadIntrinsics(IntNo, Op, Subtarget, DAG);
11608
11609 case Intrinsic::riscv_sf_vc_v_x_se:
11610 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_X_SE);
11611 case Intrinsic::riscv_sf_vc_v_i_se:
11612 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_I_SE);
11613 case Intrinsic::riscv_sf_vc_v_xv_se:
11614 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XV_SE);
11615 case Intrinsic::riscv_sf_vc_v_iv_se:
11616 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IV_SE);
11617 case Intrinsic::riscv_sf_vc_v_vv_se:
11618 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VV_SE);
11619 case Intrinsic::riscv_sf_vc_v_fv_se:
11620 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FV_SE);
11621 case Intrinsic::riscv_sf_vc_v_xvv_se:
11622 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XVV_SE);
11623 case Intrinsic::riscv_sf_vc_v_ivv_se:
11624 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IVV_SE);
11625 case Intrinsic::riscv_sf_vc_v_vvv_se:
11626 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VVV_SE);
11627 case Intrinsic::riscv_sf_vc_v_fvv_se:
11628 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FVV_SE);
11629 case Intrinsic::riscv_sf_vc_v_xvw_se:
11630 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XVW_SE);
11631 case Intrinsic::riscv_sf_vc_v_ivw_se:
11632 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IVW_SE);
11633 case Intrinsic::riscv_sf_vc_v_vvw_se:
11634 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VVW_SE);
11635 case Intrinsic::riscv_sf_vc_v_fvw_se:
11636 return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FVW_SE);
11637 }
11638
11639 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
11640}
11641
11642static SDValue
11644 const RISCVSubtarget &Subtarget,
11645 SelectionDAG &DAG) {
11646 bool IsStrided;
11647 switch (IntNo) {
11648 case Intrinsic::riscv_seg2_store_mask:
11649 case Intrinsic::riscv_seg3_store_mask:
11650 case Intrinsic::riscv_seg4_store_mask:
11651 case Intrinsic::riscv_seg5_store_mask:
11652 case Intrinsic::riscv_seg6_store_mask:
11653 case Intrinsic::riscv_seg7_store_mask:
11654 case Intrinsic::riscv_seg8_store_mask:
11655 IsStrided = false;
11656 break;
11657 case Intrinsic::riscv_sseg2_store_mask:
11658 case Intrinsic::riscv_sseg3_store_mask:
11659 case Intrinsic::riscv_sseg4_store_mask:
11660 case Intrinsic::riscv_sseg5_store_mask:
11661 case Intrinsic::riscv_sseg6_store_mask:
11662 case Intrinsic::riscv_sseg7_store_mask:
11663 case Intrinsic::riscv_sseg8_store_mask:
11664 IsStrided = true;
11665 break;
11666 default:
11667 llvm_unreachable("unexpected intrinsic ID");
11668 }
11669
11670 SDLoc DL(Op);
11671 static const Intrinsic::ID VssegInts[] = {
11672 Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
11673 Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
11674 Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
11675 Intrinsic::riscv_vsseg8_mask};
11676 static const Intrinsic::ID VsssegInts[] = {
11677 Intrinsic::riscv_vssseg2_mask, Intrinsic::riscv_vssseg3_mask,
11678 Intrinsic::riscv_vssseg4_mask, Intrinsic::riscv_vssseg5_mask,
11679 Intrinsic::riscv_vssseg6_mask, Intrinsic::riscv_vssseg7_mask,
11680 Intrinsic::riscv_vssseg8_mask};
11681
11682 // Operands: (chain, int_id, vec*, ptr, mask, vl) or
11683 // (chain, int_id, vec*, ptr, stride, mask, vl)
11684 unsigned NF = Op->getNumOperands() - (IsStrided ? 6 : 5);
11685 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
11686 MVT XLenVT = Subtarget.getXLenVT();
11687 MVT VT = Op->getOperand(2).getSimpleValueType();
11688 MVT ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
11689 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
11690 ContainerVT.getScalarSizeInBits();
11691 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
11692
11693 SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
11694 SDValue Mask = Op.getOperand(Op.getNumOperands() - 2);
11695 MVT MaskVT = Mask.getSimpleValueType();
11696 MVT MaskContainerVT =
11697 ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget);
11698 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
11699
11700 SDValue IntID = DAG.getTargetConstant(
11701 IsStrided ? VsssegInts[NF - 2] : VssegInts[NF - 2], DL, XLenVT);
11702 SDValue Ptr = Op->getOperand(NF + 2);
11703
11704 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
11705
11706 SDValue StoredVal = DAG.getUNDEF(VecTupTy);
11707 for (unsigned i = 0; i < NF; i++)
11708 StoredVal = DAG.getNode(
11709 RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
11710 convertToScalableVector(ContainerVT, FixedIntrinsic->getOperand(2 + i),
11711 DAG, Subtarget),
11712 DAG.getTargetConstant(i, DL, MVT::i32));
11713
11715 FixedIntrinsic->getChain(),
11716 IntID,
11717 StoredVal,
11718 Ptr,
11719 Mask,
11720 VL,
11721 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
11722 // Insert the stride operand.
11723 if (IsStrided)
11724 Ops.insert(std::next(Ops.begin(), 4),
11725 Op.getOperand(Op.getNumOperands() - 3));
11726
11727 return DAG.getMemIntrinsicNode(
11728 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
11729 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
11730}
11731
11732SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11733 SelectionDAG &DAG) const {
11734 unsigned IntNo = Op.getConstantOperandVal(1);
11735 switch (IntNo) {
11736 default:
11737 break;
11738 case Intrinsic::riscv_seg2_store_mask:
11739 case Intrinsic::riscv_seg3_store_mask:
11740 case Intrinsic::riscv_seg4_store_mask:
11741 case Intrinsic::riscv_seg5_store_mask:
11742 case Intrinsic::riscv_seg6_store_mask:
11743 case Intrinsic::riscv_seg7_store_mask:
11744 case Intrinsic::riscv_seg8_store_mask:
11745 case Intrinsic::riscv_sseg2_store_mask:
11746 case Intrinsic::riscv_sseg3_store_mask:
11747 case Intrinsic::riscv_sseg4_store_mask:
11748 case Intrinsic::riscv_sseg5_store_mask:
11749 case Intrinsic::riscv_sseg6_store_mask:
11750 case Intrinsic::riscv_sseg7_store_mask:
11751 case Intrinsic::riscv_sseg8_store_mask:
11752 return lowerFixedVectorSegStoreIntrinsics(IntNo, Op, Subtarget, DAG);
11753
11754 case Intrinsic::riscv_sf_vc_xv_se:
11755 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XV_SE);
11756 case Intrinsic::riscv_sf_vc_iv_se:
11757 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IV_SE);
11758 case Intrinsic::riscv_sf_vc_vv_se:
11759 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VV_SE);
11760 case Intrinsic::riscv_sf_vc_fv_se:
11761 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FV_SE);
11762 case Intrinsic::riscv_sf_vc_xvv_se:
11763 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XVV_SE);
11764 case Intrinsic::riscv_sf_vc_ivv_se:
11765 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IVV_SE);
11766 case Intrinsic::riscv_sf_vc_vvv_se:
11767 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VVV_SE);
11768 case Intrinsic::riscv_sf_vc_fvv_se:
11769 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FVV_SE);
11770 case Intrinsic::riscv_sf_vc_xvw_se:
11771 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XVW_SE);
11772 case Intrinsic::riscv_sf_vc_ivw_se:
11773 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IVW_SE);
11774 case Intrinsic::riscv_sf_vc_vvw_se:
11775 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VVW_SE);
11776 case Intrinsic::riscv_sf_vc_fvw_se:
11777 return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FVW_SE);
11778 }
11779
11780 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
11781}
11782
11783static unsigned getRVVReductionOp(unsigned ISDOpcode) {
11784 switch (ISDOpcode) {
11785 default:
11786 llvm_unreachable("Unhandled reduction");
11787 case ISD::VP_REDUCE_ADD:
11788 case ISD::VECREDUCE_ADD:
11789 return RISCVISD::VECREDUCE_ADD_VL;
11790 case ISD::VP_REDUCE_UMAX:
11792 return RISCVISD::VECREDUCE_UMAX_VL;
11793 case ISD::VP_REDUCE_SMAX:
11795 return RISCVISD::VECREDUCE_SMAX_VL;
11796 case ISD::VP_REDUCE_UMIN:
11798 return RISCVISD::VECREDUCE_UMIN_VL;
11799 case ISD::VP_REDUCE_SMIN:
11801 return RISCVISD::VECREDUCE_SMIN_VL;
11802 case ISD::VP_REDUCE_AND:
11803 case ISD::VECREDUCE_AND:
11804 return RISCVISD::VECREDUCE_AND_VL;
11805 case ISD::VP_REDUCE_OR:
11806 case ISD::VECREDUCE_OR:
11807 return RISCVISD::VECREDUCE_OR_VL;
11808 case ISD::VP_REDUCE_XOR:
11809 case ISD::VECREDUCE_XOR:
11810 return RISCVISD::VECREDUCE_XOR_VL;
11811 case ISD::VP_REDUCE_FADD:
11812 return RISCVISD::VECREDUCE_FADD_VL;
11813 case ISD::VP_REDUCE_SEQ_FADD:
11814 return RISCVISD::VECREDUCE_SEQ_FADD_VL;
11815 case ISD::VP_REDUCE_FMAX:
11816 case ISD::VP_REDUCE_FMAXIMUM:
11817 return RISCVISD::VECREDUCE_FMAX_VL;
11818 case ISD::VP_REDUCE_FMIN:
11819 case ISD::VP_REDUCE_FMINIMUM:
11820 return RISCVISD::VECREDUCE_FMIN_VL;
11821 }
11822
11823}
11824
11825SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
11826 SelectionDAG &DAG,
11827 bool IsVP) const {
11828 SDLoc DL(Op);
11829 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
11830 MVT VecVT = Vec.getSimpleValueType();
11831 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
11832 Op.getOpcode() == ISD::VECREDUCE_OR ||
11833 Op.getOpcode() == ISD::VECREDUCE_XOR ||
11834 Op.getOpcode() == ISD::VP_REDUCE_AND ||
11835 Op.getOpcode() == ISD::VP_REDUCE_OR ||
11836 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
11837 "Unexpected reduction lowering");
11838
11839 MVT XLenVT = Subtarget.getXLenVT();
11840
11841 MVT ContainerVT = VecVT;
11842 if (VecVT.isFixedLengthVector()) {
11843 ContainerVT = getContainerForFixedLengthVector(VecVT);
11844 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11845 }
11846
11847 SDValue Mask, VL;
11848 if (IsVP) {
11849 Mask = Op.getOperand(2);
11850 VL = Op.getOperand(3);
11851 } else {
11852 std::tie(Mask, VL) =
11853 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11854 }
11855
11856 ISD::CondCode CC;
11857 switch (Op.getOpcode()) {
11858 default:
11859 llvm_unreachable("Unhandled reduction");
11860 case ISD::VECREDUCE_AND:
11861 case ISD::VP_REDUCE_AND: {
11862 // vcpop ~x == 0
11863 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
11864 if (IsVP || VecVT.isFixedLengthVector())
11865 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
11866 else
11867 Vec = DAG.getNode(ISD::XOR, DL, ContainerVT, Vec, TrueMask);
11868 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
11869 CC = ISD::SETEQ;
11870 break;
11871 }
11872 case ISD::VECREDUCE_OR:
11873 case ISD::VP_REDUCE_OR:
11874 // vcpop x != 0
11875 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
11876 CC = ISD::SETNE;
11877 break;
11878 case ISD::VECREDUCE_XOR:
11879 case ISD::VP_REDUCE_XOR: {
11880 // ((vcpop x) & 1) != 0
11881 SDValue One = DAG.getConstant(1, DL, XLenVT);
11882 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
11883 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
11884 CC = ISD::SETNE;
11885 break;
11886 }
11887 }
11888
11889 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11890 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
11891 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
11892
11893 if (!IsVP)
11894 return SetCC;
11895
11896 // Now include the start value in the operation.
11897 // Note that we must return the start value when no elements are operated
11898 // upon. The vcpop instructions we've emitted in each case above will return
11899 // 0 for an inactive vector, and so we've already received the neutral value:
11900 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
11901 // can simply include the start value.
11902 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
11903 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
11904}
11905
11906static bool isNonZeroAVL(SDValue AVL) {
11907 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
11908 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
11909 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
11910 (ImmAVL && ImmAVL->getZExtValue() >= 1);
11911}
11912
11913/// Helper to lower a reduction sequence of the form:
11914/// scalar = reduce_op vec, scalar_start
11915static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
11916 SDValue StartValue, SDValue Vec, SDValue Mask,
11917 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
11918 const RISCVSubtarget &Subtarget) {
11919 const MVT VecVT = Vec.getSimpleValueType();
11920 const MVT M1VT = RISCVTargetLowering::getM1VT(VecVT);
11921 const MVT XLenVT = Subtarget.getXLenVT();
11922 const bool NonZeroAVL = isNonZeroAVL(VL);
11923
11924 // The reduction needs an LMUL1 input; do the splat at either LMUL1
11925 // or the original VT if fractional.
11926 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
11927 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
11928 // prove it is non-zero. For the AVL=0 case, we need the scalar to
11929 // be the result of the reduction operation.
11930 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
11931 SDValue InitialValue =
11932 lowerScalarInsert(StartValue, InnerVL, InnerVT, DL, DAG, Subtarget);
11933 if (M1VT != InnerVT)
11934 InitialValue =
11935 DAG.getInsertSubvector(DL, DAG.getUNDEF(M1VT), InitialValue, 0);
11936 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
11938 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
11939 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
11940 return DAG.getExtractVectorElt(DL, ResVT, Reduction, 0);
11941}
11942
11943SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
11944 SelectionDAG &DAG) const {
11945 SDLoc DL(Op);
11946 SDValue Vec = Op.getOperand(0);
11947 EVT VecEVT = Vec.getValueType();
11948
11949 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
11950
11951 // Due to ordering in legalize types we may have a vector type that needs to
11952 // be split. Do that manually so we can get down to a legal type.
11953 while (getTypeAction(*DAG.getContext(), VecEVT) ==
11955 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
11956 VecEVT = Lo.getValueType();
11957 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
11958 }
11959
11960 // TODO: The type may need to be widened rather than split. Or widened before
11961 // it can be split.
11962 if (!isTypeLegal(VecEVT))
11963 return SDValue();
11964
11965 MVT VecVT = VecEVT.getSimpleVT();
11966 MVT VecEltVT = VecVT.getVectorElementType();
11967 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
11968
11969 MVT ContainerVT = VecVT;
11970 if (VecVT.isFixedLengthVector()) {
11971 ContainerVT = getContainerForFixedLengthVector(VecVT);
11972 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11973 }
11974
11975 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11976
11977 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
11978 switch (BaseOpc) {
11979 case ISD::AND:
11980 case ISD::OR:
11981 case ISD::UMAX:
11982 case ISD::UMIN:
11983 case ISD::SMAX:
11984 case ISD::SMIN:
11985 StartV = DAG.getExtractVectorElt(DL, VecEltVT, Vec, 0);
11986 }
11987 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
11988 Mask, VL, DL, DAG, Subtarget);
11989}
11990
11991// Given a reduction op, this function returns the matching reduction opcode,
11992// the vector SDValue and the scalar SDValue required to lower this to a
11993// RISCVISD node.
11994static std::tuple<unsigned, SDValue, SDValue>
11996 const RISCVSubtarget &Subtarget) {
11997 SDLoc DL(Op);
11998 auto Flags = Op->getFlags();
11999 unsigned Opcode = Op.getOpcode();
12000 switch (Opcode) {
12001 default:
12002 llvm_unreachable("Unhandled reduction");
12003 case ISD::VECREDUCE_FADD: {
12004 // Use positive zero if we can. It is cheaper to materialize.
12005 SDValue Zero =
12006 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
12007 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
12008 }
12010 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
12011 Op.getOperand(0));
12015 case ISD::VECREDUCE_FMAX: {
12016 SDValue Front = DAG.getExtractVectorElt(DL, EltVT, Op.getOperand(0), 0);
12017 unsigned RVVOpc =
12018 (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)
12019 ? RISCVISD::VECREDUCE_FMIN_VL
12020 : RISCVISD::VECREDUCE_FMAX_VL;
12021 return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
12022 }
12023 }
12024}
12025
12026SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
12027 SelectionDAG &DAG) const {
12028 SDLoc DL(Op);
12029 MVT VecEltVT = Op.getSimpleValueType();
12030
12031 unsigned RVVOpcode;
12032 SDValue VectorVal, ScalarVal;
12033 std::tie(RVVOpcode, VectorVal, ScalarVal) =
12034 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
12035 MVT VecVT = VectorVal.getSimpleValueType();
12036
12037 MVT ContainerVT = VecVT;
12038 if (VecVT.isFixedLengthVector()) {
12039 ContainerVT = getContainerForFixedLengthVector(VecVT);
12040 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
12041 }
12042
12043 MVT ResVT = Op.getSimpleValueType();
12044 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
12045 SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,
12046 VL, DL, DAG, Subtarget);
12047 if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
12048 Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
12049 return Res;
12050
12051 if (Op->getFlags().hasNoNaNs())
12052 return Res;
12053
12054 // Force output to NaN if any element is Nan.
12055 SDValue IsNan =
12056 DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
12057 {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),
12058 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
12059 MVT XLenVT = Subtarget.getXLenVT();
12060 SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL);
12061 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,
12062 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
12063 return DAG.getSelect(
12064 DL, ResVT, NoNaNs, Res,
12065 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
12066}
12067
12068SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
12069 SelectionDAG &DAG) const {
12070 SDLoc DL(Op);
12071 unsigned Opc = Op.getOpcode();
12072 SDValue Start = Op.getOperand(0);
12073 SDValue Vec = Op.getOperand(1);
12074 EVT VecEVT = Vec.getValueType();
12075 MVT XLenVT = Subtarget.getXLenVT();
12076
12077 // TODO: The type may need to be widened rather than split. Or widened before
12078 // it can be split.
12079 if (!isTypeLegal(VecEVT))
12080 return SDValue();
12081
12082 MVT VecVT = VecEVT.getSimpleVT();
12083 unsigned RVVOpcode = getRVVReductionOp(Opc);
12084
12085 if (VecVT.isFixedLengthVector()) {
12086 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
12087 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
12088 }
12089
12090 SDValue VL = Op.getOperand(3);
12091 SDValue Mask = Op.getOperand(2);
12092 SDValue Res =
12093 lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
12094 Vec, Mask, VL, DL, DAG, Subtarget);
12095 if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) ||
12096 Op->getFlags().hasNoNaNs())
12097 return Res;
12098
12099 // Propagate NaNs.
12100 MVT PredVT = getMaskTypeFor(Vec.getSimpleValueType());
12101 // Check if any of the elements in Vec is NaN.
12102 SDValue IsNaN = DAG.getNode(
12103 RISCVISD::SETCC_VL, DL, PredVT,
12104 {Vec, Vec, DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(PredVT), Mask, VL});
12105 SDValue VCPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNaN, Mask, VL);
12106 // Check if the start value is NaN.
12107 SDValue StartIsNaN = DAG.getSetCC(DL, XLenVT, Start, Start, ISD::SETUO);
12108 VCPop = DAG.getNode(ISD::OR, DL, XLenVT, VCPop, StartIsNaN);
12109 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, VCPop,
12110 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
12111 MVT ResVT = Res.getSimpleValueType();
12112 return DAG.getSelect(
12113 DL, ResVT, NoNaNs, Res,
12114 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
12115}
12116
12117SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
12118 SelectionDAG &DAG) const {
12119 SDValue Vec = Op.getOperand(0);
12120 SDValue SubVec = Op.getOperand(1);
12121 MVT VecVT = Vec.getSimpleValueType();
12122 MVT SubVecVT = SubVec.getSimpleValueType();
12123
12124 SDLoc DL(Op);
12125 MVT XLenVT = Subtarget.getXLenVT();
12126 unsigned OrigIdx = Op.getConstantOperandVal(2);
12127 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
12128
12129 if (OrigIdx == 0 && Vec.isUndef())
12130 return Op;
12131
12132 // We don't have the ability to slide mask vectors up indexed by their i1
12133 // elements; the smallest we can do is i8. Often we are able to bitcast to
12134 // equivalent i8 vectors. Note that when inserting a fixed-length vector
12135 // into a scalable one, we might not necessarily have enough scalable
12136 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
12137 if (SubVecVT.getVectorElementType() == MVT::i1) {
12138 if (VecVT.getVectorMinNumElements() >= 8 &&
12139 SubVecVT.getVectorMinNumElements() >= 8) {
12140 assert(OrigIdx % 8 == 0 && "Invalid index");
12141 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
12142 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
12143 "Unexpected mask vector lowering");
12144 OrigIdx /= 8;
12145 SubVecVT =
12146 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
12147 SubVecVT.isScalableVector());
12148 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
12149 VecVT.isScalableVector());
12150 Vec = DAG.getBitcast(VecVT, Vec);
12151 SubVec = DAG.getBitcast(SubVecVT, SubVec);
12152 } else {
12153 // We can't slide this mask vector up indexed by its i1 elements.
12154 // This poses a problem when we wish to insert a scalable vector which
12155 // can't be re-expressed as a larger type. Just choose the slow path and
12156 // extend to a larger type, then truncate back down.
12157 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
12158 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
12159 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
12160 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
12161 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
12162 Op.getOperand(2));
12163 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
12164 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
12165 }
12166 }
12167
12168 // If the subvector vector is a fixed-length type and we don't know VLEN
12169 // exactly, we cannot use subregister manipulation to simplify the codegen; we
12170 // don't know which register of a LMUL group contains the specific subvector
12171 // as we only know the minimum register size. Therefore we must slide the
12172 // vector group up the full amount.
12173 const auto VLen = Subtarget.getRealVLen();
12174 if (SubVecVT.isFixedLengthVector() && !VLen) {
12175 MVT ContainerVT = VecVT;
12176 if (VecVT.isFixedLengthVector()) {
12177 ContainerVT = getContainerForFixedLengthVector(VecVT);
12178 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
12179 }
12180
12181 SubVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), SubVec, 0);
12182
12183 SDValue Mask =
12184 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
12185 // Set the vector length to only the number of elements we care about. Note
12186 // that for slideup this includes the offset.
12187 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
12188 SDValue VL = DAG.getConstant(EndIndex, DL, XLenVT);
12189
12190 // Use tail agnostic policy if we're inserting over Vec's tail.
12192 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
12194
12195 // If we're inserting into the lowest elements, use a tail undisturbed
12196 // vmv.v.v.
12197 if (OrigIdx == 0) {
12198 SubVec =
12199 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
12200 } else {
12201 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
12202 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
12203 SlideupAmt, Mask, VL, Policy);
12204 }
12205
12206 if (VecVT.isFixedLengthVector())
12207 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
12208 return DAG.getBitcast(Op.getValueType(), SubVec);
12209 }
12210
12211 MVT ContainerVecVT = VecVT;
12212 if (VecVT.isFixedLengthVector()) {
12213 ContainerVecVT = getContainerForFixedLengthVector(VecVT);
12214 Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
12215 }
12216
12217 MVT ContainerSubVecVT = SubVecVT;
12218 if (SubVecVT.isFixedLengthVector()) {
12219 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
12220 SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
12221 }
12222
12223 unsigned SubRegIdx;
12224 ElementCount RemIdx;
12225 // insert_subvector scales the index by vscale if the subvector is scalable,
12226 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
12227 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
12228 if (SubVecVT.isFixedLengthVector()) {
12229 assert(VLen);
12230 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
12231 auto Decompose =
12233 ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
12234 SubRegIdx = Decompose.first;
12235 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
12236 (OrigIdx % Vscale));
12237 } else {
12238 auto Decompose =
12240 ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
12241 SubRegIdx = Decompose.first;
12242 RemIdx = ElementCount::getScalable(Decompose.second);
12243 }
12244
12245 TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock);
12247 Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));
12248 bool ExactlyVecRegSized =
12249 Subtarget.expandVScale(SubVecVT.getSizeInBits())
12250 .isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));
12251
12252 // 1. If the Idx has been completely eliminated and this subvector's size is
12253 // a vector register or a multiple thereof, or the surrounding elements are
12254 // undef, then this is a subvector insert which naturally aligns to a vector
12255 // register. These can easily be handled using subregister manipulation.
12256 // 2. If the subvector isn't an exact multiple of a valid register group size,
12257 // then the insertion must preserve the undisturbed elements of the register.
12258 // We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
12259 // vector type (which resolves to a subregister copy), performing a VSLIDEUP
12260 // to place the subvector within the vector register, and an INSERT_SUBVECTOR
12261 // of that LMUL=1 type back into the larger vector (resolving to another
12262 // subregister operation). See below for how our VSLIDEUP works. We go via a
12263 // LMUL=1 type to avoid allocating a large register group to hold our
12264 // subvector.
12265 if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {
12266 if (SubVecVT.isFixedLengthVector()) {
12267 // We may get NoSubRegister if inserting at index 0 and the subvec
12268 // container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
12269 if (SubRegIdx == RISCV::NoSubRegister) {
12270 assert(OrigIdx == 0);
12271 return Op;
12272 }
12273
12274 // Use a insert_subvector that will resolve to an insert subreg.
12275 assert(VLen);
12276 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
12277 SDValue Insert =
12278 DAG.getInsertSubvector(DL, Vec, SubVec, OrigIdx / Vscale);
12279 if (VecVT.isFixedLengthVector())
12280 Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
12281 return Insert;
12282 }
12283 return Op;
12284 }
12285
12286 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
12287 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
12288 // (in our case undisturbed). This means we can set up a subvector insertion
12289 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
12290 // size of the subvector.
12291 MVT InterSubVT = ContainerVecVT;
12292 SDValue AlignedExtract = Vec;
12293 unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
12294 if (SubVecVT.isFixedLengthVector()) {
12295 assert(VLen);
12296 AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
12297 }
12298 if (ContainerVecVT.bitsGT(RISCVTargetLowering::getM1VT(ContainerVecVT))) {
12299 InterSubVT = RISCVTargetLowering::getM1VT(ContainerVecVT);
12300 // Extract a subvector equal to the nearest full vector register type. This
12301 // should resolve to a EXTRACT_SUBREG instruction.
12302 AlignedExtract = DAG.getExtractSubvector(DL, InterSubVT, Vec, AlignedIdx);
12303 }
12304
12305 SubVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(InterSubVT), SubVec, 0);
12306
12307 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);
12308
12309 ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
12310 VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());
12311
12312 // Use tail agnostic policy if we're inserting over InterSubVT's tail.
12314 if (Subtarget.expandVScale(EndIndex) ==
12315 Subtarget.expandVScale(InterSubVT.getVectorElementCount()))
12317
12318 // If we're inserting into the lowest elements, use a tail undisturbed
12319 // vmv.v.v.
12320 if (RemIdx.isZero()) {
12321 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
12322 SubVec, VL);
12323 } else {
12324 SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
12325
12326 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
12327 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
12328
12329 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
12330 SlideupAmt, Mask, VL, Policy);
12331 }
12332
12333 // If required, insert this subvector back into the correct vector register.
12334 // This should resolve to an INSERT_SUBREG instruction.
12335 if (ContainerVecVT.bitsGT(InterSubVT))
12336 SubVec = DAG.getInsertSubvector(DL, Vec, SubVec, AlignedIdx);
12337
12338 if (VecVT.isFixedLengthVector())
12339 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
12340
12341 // We might have bitcast from a mask type: cast back to the original type if
12342 // required.
12343 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
12344}
12345
12346SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
12347 SelectionDAG &DAG) const {
12348 SDValue Vec = Op.getOperand(0);
12349 MVT SubVecVT = Op.getSimpleValueType();
12350 MVT VecVT = Vec.getSimpleValueType();
12351
12352 SDLoc DL(Op);
12353 MVT XLenVT = Subtarget.getXLenVT();
12354 unsigned OrigIdx = Op.getConstantOperandVal(1);
12355 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
12356
12357 // With an index of 0 this is a cast-like subvector, which can be performed
12358 // with subregister operations.
12359 if (OrigIdx == 0)
12360 return Op;
12361
12362 // We don't have the ability to slide mask vectors down indexed by their i1
12363 // elements; the smallest we can do is i8. Often we are able to bitcast to
12364 // equivalent i8 vectors. Note that when extracting a fixed-length vector
12365 // from a scalable one, we might not necessarily have enough scalable
12366 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
12367 if (SubVecVT.getVectorElementType() == MVT::i1) {
12368 if (VecVT.getVectorMinNumElements() >= 8 &&
12369 SubVecVT.getVectorMinNumElements() >= 8) {
12370 assert(OrigIdx % 8 == 0 && "Invalid index");
12371 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
12372 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
12373 "Unexpected mask vector lowering");
12374 OrigIdx /= 8;
12375 SubVecVT =
12376 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
12377 SubVecVT.isScalableVector());
12378 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
12379 VecVT.isScalableVector());
12380 Vec = DAG.getBitcast(VecVT, Vec);
12381 } else {
12382 // We can't slide this mask vector down, indexed by its i1 elements.
12383 // This poses a problem when we wish to extract a scalable vector which
12384 // can't be re-expressed as a larger type. Just choose the slow path and
12385 // extend to a larger type, then truncate back down.
12386 // TODO: We could probably improve this when extracting certain fixed
12387 // from fixed, where we can extract as i8 and shift the correct element
12388 // right to reach the desired subvector?
12389 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
12390 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
12391 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
12392 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
12393 Op.getOperand(1));
12394 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
12395 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
12396 }
12397 }
12398
12399 const auto VLen = Subtarget.getRealVLen();
12400
12401 // If the subvector vector is a fixed-length type and we don't know VLEN
12402 // exactly, we cannot use subregister manipulation to simplify the codegen; we
12403 // don't know which register of a LMUL group contains the specific subvector
12404 // as we only know the minimum register size. Therefore we must slide the
12405 // vector group down the full amount.
12406 if (SubVecVT.isFixedLengthVector() && !VLen) {
12407 MVT ContainerVT = VecVT;
12408 if (VecVT.isFixedLengthVector()) {
12409 ContainerVT = getContainerForFixedLengthVector(VecVT);
12410 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
12411 }
12412
12413 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
12414 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
12415 if (auto ShrunkVT =
12416 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
12417 ContainerVT = *ShrunkVT;
12418 Vec = DAG.getExtractSubvector(DL, ContainerVT, Vec, 0);
12419 }
12420
12421 SDValue Mask =
12422 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
12423 // Set the vector length to only the number of elements we care about. This
12424 // avoids sliding down elements we're going to discard straight away.
12425 SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
12426 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
12427 SDValue Slidedown =
12428 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
12429 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
12430 // Now we can use a cast-like subvector extract to get the result.
12431 Slidedown = DAG.getExtractSubvector(DL, SubVecVT, Slidedown, 0);
12432 return DAG.getBitcast(Op.getValueType(), Slidedown);
12433 }
12434
12435 if (VecVT.isFixedLengthVector()) {
12436 VecVT = getContainerForFixedLengthVector(VecVT);
12437 Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget);
12438 }
12439
12440 MVT ContainerSubVecVT = SubVecVT;
12441 if (SubVecVT.isFixedLengthVector())
12442 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
12443
12444 unsigned SubRegIdx;
12445 ElementCount RemIdx;
12446 // extract_subvector scales the index by vscale if the subvector is scalable,
12447 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
12448 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
12449 if (SubVecVT.isFixedLengthVector()) {
12450 assert(VLen);
12451 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
12452 auto Decompose =
12454 VecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
12455 SubRegIdx = Decompose.first;
12456 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
12457 (OrigIdx % Vscale));
12458 } else {
12459 auto Decompose =
12461 VecVT, ContainerSubVecVT, OrigIdx, TRI);
12462 SubRegIdx = Decompose.first;
12463 RemIdx = ElementCount::getScalable(Decompose.second);
12464 }
12465
12466 // If the Idx has been completely eliminated then this is a subvector extract
12467 // which naturally aligns to a vector register. These can easily be handled
12468 // using subregister manipulation. We use an extract_subvector that will
12469 // resolve to an extract subreg.
12470 if (RemIdx.isZero()) {
12471 if (SubVecVT.isFixedLengthVector()) {
12472 assert(VLen);
12473 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
12474 Vec =
12475 DAG.getExtractSubvector(DL, ContainerSubVecVT, Vec, OrigIdx / Vscale);
12476 return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);
12477 }
12478 return Op;
12479 }
12480
12481 // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
12482 // was > M1 then the index would need to be a multiple of VLMAX, and so would
12483 // divide exactly.
12484 assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||
12485 getLMUL(ContainerSubVecVT) == RISCVVType::LMUL_1);
12486
12487 // If the vector type is an LMUL-group type, extract a subvector equal to the
12488 // nearest full vector register type.
12489 MVT InterSubVT = VecVT;
12490 if (VecVT.bitsGT(RISCVTargetLowering::getM1VT(VecVT))) {
12491 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
12492 // we should have successfully decomposed the extract into a subregister.
12493 // We use an extract_subvector that will resolve to a subreg extract.
12494 assert(SubRegIdx != RISCV::NoSubRegister);
12495 (void)SubRegIdx;
12496 unsigned Idx = OrigIdx - RemIdx.getKnownMinValue();
12497 if (SubVecVT.isFixedLengthVector()) {
12498 assert(VLen);
12499 Idx /= *VLen / RISCV::RVVBitsPerBlock;
12500 }
12501 InterSubVT = RISCVTargetLowering::getM1VT(VecVT);
12502 Vec = DAG.getExtractSubvector(DL, InterSubVT, Vec, Idx);
12503 }
12504
12505 // Slide this vector register down by the desired number of elements in order
12506 // to place the desired subvector starting at element 0.
12507 SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
12508 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
12509 if (SubVecVT.isFixedLengthVector())
12510 VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
12511 SDValue Slidedown =
12512 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
12513 Vec, SlidedownAmt, Mask, VL);
12514
12515 // Now the vector is in the right position, extract our final subvector. This
12516 // should resolve to a COPY.
12517 Slidedown = DAG.getExtractSubvector(DL, SubVecVT, Slidedown, 0);
12518
12519 // We might have bitcast from a mask type: cast back to the original type if
12520 // required.
12521 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
12522}
12523
12524// Widen a vector's operands to i8, then truncate its results back to the
12525// original type, typically i1. All operand and result types must be the same.
12527 SelectionDAG &DAG) {
12528 MVT VT = N.getSimpleValueType();
12529 MVT WideVT = VT.changeVectorElementType(MVT::i8);
12531 for (SDValue Op : N->ops()) {
12532 assert(Op.getSimpleValueType() == VT &&
12533 "Operands and result must be same type");
12534 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
12535 }
12536
12537 unsigned NumVals = N->getNumValues();
12538
12540 NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
12541 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
12542 SmallVector<SDValue, 4> TruncVals;
12543 for (unsigned I = 0; I < NumVals; I++) {
12544 TruncVals.push_back(
12545 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
12546 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
12547 }
12548
12549 if (TruncVals.size() > 1)
12550 return DAG.getMergeValues(TruncVals, DL);
12551 return TruncVals.front();
12552}
12553
12554SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
12555 SelectionDAG &DAG) const {
12556 SDLoc DL(Op);
12557 MVT VecVT = Op.getSimpleValueType();
12558
12559 const unsigned Factor = Op->getNumValues();
12560 assert(Factor <= 8);
12561
12562 // 1 bit element vectors need to be widened to e8
12563 if (VecVT.getVectorElementType() == MVT::i1)
12564 return widenVectorOpsToi8(Op, DL, DAG);
12565
12566 // Convert to scalable vectors first.
12567 if (VecVT.isFixedLengthVector()) {
12568 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
12570 for (unsigned i = 0U; i < Factor; ++i)
12571 Ops[i] = convertToScalableVector(ContainerVT, Op.getOperand(i), DAG,
12572 Subtarget);
12573
12574 SmallVector<EVT, 8> VTs(Factor, ContainerVT);
12575 SDValue NewDeinterleave =
12577
12578 SmallVector<SDValue, 8> Res(Factor);
12579 for (unsigned i = 0U; i < Factor; ++i)
12580 Res[i] = convertFromScalableVector(VecVT, NewDeinterleave.getValue(i),
12581 DAG, Subtarget);
12582 return DAG.getMergeValues(Res, DL);
12583 }
12584
12585 // If concatenating would exceed LMUL=8, we need to split.
12586 if ((VecVT.getSizeInBits().getKnownMinValue() * Factor) >
12587 (8 * RISCV::RVVBitsPerBlock)) {
12588 SmallVector<SDValue, 8> Ops(Factor * 2);
12589 for (unsigned i = 0; i != Factor; ++i) {
12590 auto [OpLo, OpHi] = DAG.SplitVectorOperand(Op.getNode(), i);
12591 Ops[i * 2] = OpLo;
12592 Ops[i * 2 + 1] = OpHi;
12593 }
12594
12595 SmallVector<EVT, 8> VTs(Factor, Ops[0].getValueType());
12596
12598 ArrayRef(Ops).slice(0, Factor));
12600 ArrayRef(Ops).slice(Factor, Factor));
12601
12602 SmallVector<SDValue, 8> Res(Factor);
12603 for (unsigned i = 0; i != Factor; ++i)
12604 Res[i] = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo.getValue(i),
12605 Hi.getValue(i));
12606
12607 return DAG.getMergeValues(Res, DL);
12608 }
12609
12610 if (Subtarget.hasVendorXRivosVizip() && Factor == 2) {
12611 MVT VT = Op->getSimpleValueType(0);
12612 SDValue V1 = Op->getOperand(0);
12613 SDValue V2 = Op->getOperand(1);
12614
12615 // For fractional LMUL, check if we can use a higher LMUL
12616 // instruction to avoid a vslidedown.
12617 if (SDValue Src = foldConcatVector(V1, V2);
12618 Src && RISCVTargetLowering::getM1VT(VT).bitsGT(VT)) {
12619 EVT NewVT = VT.getDoubleNumVectorElementsVT();
12620 Src = DAG.getExtractSubvector(DL, NewVT, Src, 0);
12621 // Freeze the source so we can increase its use count.
12622 Src = DAG.getFreeze(Src);
12623 SDValue Even = lowerVZIP(RISCVISD::RI_VUNZIP2A_VL, Src,
12624 DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
12625 SDValue Odd = lowerVZIP(RISCVISD::RI_VUNZIP2B_VL, Src,
12626 DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
12627 Even = DAG.getExtractSubvector(DL, VT, Even, 0);
12628 Odd = DAG.getExtractSubvector(DL, VT, Odd, 0);
12629 return DAG.getMergeValues({Even, Odd}, DL);
12630 }
12631
12632 // Freeze the sources so we can increase their use count.
12633 V1 = DAG.getFreeze(V1);
12634 V2 = DAG.getFreeze(V2);
12635 SDValue Even =
12636 lowerVZIP(RISCVISD::RI_VUNZIP2A_VL, V1, V2, DL, DAG, Subtarget);
12637 SDValue Odd =
12638 lowerVZIP(RISCVISD::RI_VUNZIP2B_VL, V1, V2, DL, DAG, Subtarget);
12639 return DAG.getMergeValues({Even, Odd}, DL);
12640 }
12641
12642 SmallVector<SDValue, 8> Ops(Op->op_values());
12643
12644 // Concatenate the vectors as one vector to deinterleave
12645 MVT ConcatVT =
12648 PowerOf2Ceil(Factor)));
12649 if (Ops.size() < PowerOf2Ceil(Factor))
12650 Ops.append(PowerOf2Ceil(Factor) - Factor, DAG.getUNDEF(VecVT));
12651 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, Ops);
12652
12653 if (Factor == 2) {
12654 // We can deinterleave through vnsrl.wi if the element type is smaller than
12655 // ELEN
12656 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
12657 SDValue Even = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 0, DAG);
12658 SDValue Odd = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 1, DAG);
12659 return DAG.getMergeValues({Even, Odd}, DL);
12660 }
12661
12662 // For the indices, use the vmv.v.x of an i8 constant to fill the largest
12663 // possibly mask vector, then extract the required subvector. Doing this
12664 // (instead of a vid, vmsne sequence) reduces LMUL, and allows the mask
12665 // creation to be rematerialized during register allocation to reduce
12666 // register pressure if needed.
12667
12668 MVT MaskVT = ConcatVT.changeVectorElementType(MVT::i1);
12669
12670 SDValue EvenSplat = DAG.getConstant(0b01010101, DL, MVT::nxv8i8);
12671 EvenSplat = DAG.getBitcast(MVT::nxv64i1, EvenSplat);
12672 SDValue EvenMask = DAG.getExtractSubvector(DL, MaskVT, EvenSplat, 0);
12673
12674 SDValue OddSplat = DAG.getConstant(0b10101010, DL, MVT::nxv8i8);
12675 OddSplat = DAG.getBitcast(MVT::nxv64i1, OddSplat);
12676 SDValue OddMask = DAG.getExtractSubvector(DL, MaskVT, OddSplat, 0);
12677
12678 // vcompress the even and odd elements into two separate vectors
12679 SDValue EvenWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
12680 EvenMask, DAG.getUNDEF(ConcatVT));
12681 SDValue OddWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
12682 OddMask, DAG.getUNDEF(ConcatVT));
12683
12684 // Extract the result half of the gather for even and odd
12685 SDValue Even = DAG.getExtractSubvector(DL, VecVT, EvenWide, 0);
12686 SDValue Odd = DAG.getExtractSubvector(DL, VecVT, OddWide, 0);
12687
12688 return DAG.getMergeValues({Even, Odd}, DL);
12689 }
12690
12691 // Store with unit-stride store and load it back with segmented load.
12692 MVT XLenVT = Subtarget.getXLenVT();
12693 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
12694 SDValue Passthru = DAG.getUNDEF(ConcatVT);
12695
12696 // Allocate a stack slot.
12697 Align Alignment = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
12699 DAG.CreateStackTemporary(ConcatVT.getStoreSize(), Alignment);
12700 auto &MF = DAG.getMachineFunction();
12701 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
12702 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
12703
12704 SDValue StoreOps[] = {DAG.getEntryNode(),
12705 DAG.getTargetConstant(Intrinsic::riscv_vse, DL, XLenVT),
12706 Concat, StackPtr, VL};
12707
12708 SDValue Chain = DAG.getMemIntrinsicNode(
12709 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), StoreOps,
12710 ConcatVT.getVectorElementType(), PtrInfo, Alignment,
12712
12713 static const Intrinsic::ID VlsegIntrinsicsIds[] = {
12714 Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
12715 Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,
12716 Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
12717 Intrinsic::riscv_vlseg8_mask};
12718
12719 SDValue LoadOps[] = {
12720 Chain,
12721 DAG.getTargetConstant(VlsegIntrinsicsIds[Factor - 2], DL, XLenVT),
12722 Passthru,
12723 StackPtr,
12724 Mask,
12725 VL,
12728 DAG.getTargetConstant(Log2_64(VecVT.getScalarSizeInBits()), DL, XLenVT)};
12729
12730 unsigned Sz =
12731 Factor * VecVT.getVectorMinNumElements() * VecVT.getScalarSizeInBits();
12732 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, Factor);
12733
12735 ISD::INTRINSIC_W_CHAIN, DL, DAG.getVTList({VecTupTy, MVT::Other}),
12736 LoadOps, ConcatVT.getVectorElementType(), PtrInfo, Alignment,
12738
12739 SmallVector<SDValue, 8> Res(Factor);
12740
12741 for (unsigned i = 0U; i < Factor; ++i)
12742 Res[i] = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, VecVT, Load,
12743 DAG.getTargetConstant(i, DL, MVT::i32));
12744
12745 return DAG.getMergeValues(Res, DL);
12746}
12747
12748SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
12749 SelectionDAG &DAG) const {
12750 SDLoc DL(Op);
12751 MVT VecVT = Op.getSimpleValueType();
12752
12753 const unsigned Factor = Op.getNumOperands();
12754 assert(Factor <= 8);
12755
12756 // i1 vectors need to be widened to i8
12757 if (VecVT.getVectorElementType() == MVT::i1)
12758 return widenVectorOpsToi8(Op, DL, DAG);
12759
12760 // Convert to scalable vectors first.
12761 if (VecVT.isFixedLengthVector()) {
12762 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
12764 for (unsigned i = 0U; i < Factor; ++i)
12765 Ops[i] = convertToScalableVector(ContainerVT, Op.getOperand(i), DAG,
12766 Subtarget);
12767
12768 SmallVector<EVT, 8> VTs(Factor, ContainerVT);
12769 SDValue NewInterleave = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTs, Ops);
12770
12771 SmallVector<SDValue, 8> Res(Factor);
12772 for (unsigned i = 0U; i < Factor; ++i)
12773 Res[i] = convertFromScalableVector(VecVT, NewInterleave.getValue(i), DAG,
12774 Subtarget);
12775 return DAG.getMergeValues(Res, DL);
12776 }
12777
12778 MVT XLenVT = Subtarget.getXLenVT();
12779 auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
12780
12781 // If the VT is larger than LMUL=8, we need to split and reassemble.
12782 if ((VecVT.getSizeInBits().getKnownMinValue() * Factor) >
12783 (8 * RISCV::RVVBitsPerBlock)) {
12784 SmallVector<SDValue, 8> Ops(Factor * 2);
12785 for (unsigned i = 0; i != Factor; ++i) {
12786 auto [OpLo, OpHi] = DAG.SplitVectorOperand(Op.getNode(), i);
12787 Ops[i] = OpLo;
12788 Ops[i + Factor] = OpHi;
12789 }
12790
12791 SmallVector<EVT, 8> VTs(Factor, Ops[0].getValueType());
12792
12793 SDValue Res[] = {DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTs,
12794 ArrayRef(Ops).take_front(Factor)),
12796 ArrayRef(Ops).drop_front(Factor))};
12797
12798 SmallVector<SDValue, 8> Concats(Factor);
12799 for (unsigned i = 0; i != Factor; ++i) {
12800 unsigned IdxLo = 2 * i;
12801 unsigned IdxHi = 2 * i + 1;
12802 Concats[i] = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
12803 Res[IdxLo / Factor].getValue(IdxLo % Factor),
12804 Res[IdxHi / Factor].getValue(IdxHi % Factor));
12805 }
12806
12807 return DAG.getMergeValues(Concats, DL);
12808 }
12809
12810 SDValue Interleaved;
12811
12812 // Spill to the stack using a segment store for simplicity.
12813 if (Factor != 2) {
12814 EVT MemVT =
12816 VecVT.getVectorElementCount() * Factor);
12817
12818 // Allocate a stack slot.
12819 Align Alignment = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
12821 DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
12822 EVT PtrVT = StackPtr.getValueType();
12823 auto &MF = DAG.getMachineFunction();
12824 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
12825 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
12826
12827 static const Intrinsic::ID IntrIds[] = {
12828 Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
12829 Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
12830 Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
12831 Intrinsic::riscv_vsseg8_mask,
12832 };
12833
12834 unsigned Sz =
12835 Factor * VecVT.getVectorMinNumElements() * VecVT.getScalarSizeInBits();
12836 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, Factor);
12837
12838 SDValue StoredVal = DAG.getUNDEF(VecTupTy);
12839 for (unsigned i = 0; i < Factor; i++)
12840 StoredVal =
12841 DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
12842 Op.getOperand(i), DAG.getTargetConstant(i, DL, MVT::i32));
12843
12844 SDValue Ops[] = {DAG.getEntryNode(),
12845 DAG.getTargetConstant(IntrIds[Factor - 2], DL, XLenVT),
12846 StoredVal,
12847 StackPtr,
12848 Mask,
12849 VL,
12851 DL, XLenVT)};
12852
12853 SDValue Chain = DAG.getMemIntrinsicNode(
12854 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
12855 VecVT.getVectorElementType(), PtrInfo, Alignment,
12857
12858 SmallVector<SDValue, 8> Loads(Factor);
12859
12860 SDValue Increment = DAG.getTypeSize(DL, PtrVT, VecVT.getStoreSize());
12861 for (unsigned i = 0; i != Factor; ++i) {
12862 if (i != 0)
12863 StackPtr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, Increment);
12864
12865 Loads[i] = DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
12866 }
12867
12868 return DAG.getMergeValues(Loads, DL);
12869 }
12870
12871 // Use ri.vzip2{a,b} if available
12872 // TODO: Figure out the best lowering for the spread variants
12873 if (Subtarget.hasVendorXRivosVizip() && !Op.getOperand(0).isUndef() &&
12874 !Op.getOperand(1).isUndef()) {
12875 // Freeze the sources so we can increase their use count.
12876 SDValue V1 = DAG.getFreeze(Op->getOperand(0));
12877 SDValue V2 = DAG.getFreeze(Op->getOperand(1));
12878 SDValue Lo = lowerVZIP(RISCVISD::RI_VZIP2A_VL, V1, V2, DL, DAG, Subtarget);
12879 SDValue Hi = lowerVZIP(RISCVISD::RI_VZIP2B_VL, V1, V2, DL, DAG, Subtarget);
12880 return DAG.getMergeValues({Lo, Hi}, DL);
12881 }
12882
12883 // If the element type is smaller than ELEN, then we can interleave with
12884 // vwaddu.vv and vwmaccu.vx
12885 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
12886 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
12887 DAG, Subtarget);
12888 } else {
12889 // Otherwise, fallback to using vrgathere16.vv
12890 MVT ConcatVT =
12893 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
12894 Op.getOperand(0), Op.getOperand(1));
12895
12896 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
12897
12898 // 0 1 2 3 4 5 6 7 ...
12899 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
12900
12901 // 1 1 1 1 1 1 1 1 ...
12902 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
12903
12904 // 1 0 1 0 1 0 1 0 ...
12905 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
12906 OddMask = DAG.getSetCC(
12907 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
12908 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
12910
12911 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
12912
12913 // Build up the index vector for interleaving the concatenated vector
12914 // 0 0 1 1 2 2 3 3 ...
12915 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
12916 // 0 n 1 n+1 2 n+2 3 n+3 ...
12917 Idx =
12918 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
12919
12920 // Then perform the interleave
12921 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
12922 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
12923 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
12924 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
12925 }
12926
12927 // Extract the two halves from the interleaved result
12928 SDValue Lo = DAG.getExtractSubvector(DL, VecVT, Interleaved, 0);
12929 SDValue Hi = DAG.getExtractSubvector(DL, VecVT, Interleaved,
12930 VecVT.getVectorMinNumElements());
12931
12932 return DAG.getMergeValues({Lo, Hi}, DL);
12933}
12934
12935// Lower step_vector to the vid instruction. Any non-identity step value must
12936// be accounted for my manual expansion.
12937SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
12938 SelectionDAG &DAG) const {
12939 SDLoc DL(Op);
12940 MVT VT = Op.getSimpleValueType();
12941 assert(VT.isScalableVector() && "Expected scalable vector");
12942 MVT XLenVT = Subtarget.getXLenVT();
12943 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
12944 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
12945 uint64_t StepValImm = Op.getConstantOperandVal(0);
12946 if (StepValImm != 1) {
12947 if (isPowerOf2_64(StepValImm)) {
12948 SDValue StepVal =
12949 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
12950 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
12951 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
12952 } else {
12953 SDValue StepVal = lowerScalarSplat(
12954 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
12955 VL, VT, DL, DAG, Subtarget);
12956 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
12957 }
12958 }
12959 return StepVec;
12960}
12961
12962// Implement vector_reverse using vrgather.vv with indices determined by
12963// subtracting the id of each element from (VLMAX-1). This will convert
12964// the indices like so:
12965// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
12966// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
12967SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
12968 SelectionDAG &DAG) const {
12969 SDLoc DL(Op);
12970 MVT VecVT = Op.getSimpleValueType();
12971 if (VecVT.getVectorElementType() == MVT::i1) {
12972 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
12973 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
12974 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
12975 return DAG.getSetCC(DL, VecVT, Op2,
12976 DAG.getConstant(0, DL, Op2.getValueType()), ISD::SETNE);
12977 }
12978
12979 MVT ContainerVT = VecVT;
12980 SDValue Vec = Op.getOperand(0);
12981 if (VecVT.isFixedLengthVector()) {
12982 ContainerVT = getContainerForFixedLengthVector(VecVT);
12983 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
12984 }
12985
12986 MVT XLenVT = Subtarget.getXLenVT();
12987 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
12988
12989 // On some uarchs vrgather.vv will read from every input register for each
12990 // output register, regardless of the indices. However to reverse a vector
12991 // each output register only needs to read from one register. So decompose it
12992 // into LMUL * M1 vrgather.vvs, so we get O(LMUL) performance instead of
12993 // O(LMUL^2).
12994 //
12995 // vsetvli a1, zero, e64, m4, ta, ma
12996 // vrgatherei16.vv v12, v8, v16
12997 // ->
12998 // vsetvli a1, zero, e64, m1, ta, ma
12999 // vrgather.vv v15, v8, v16
13000 // vrgather.vv v14, v9, v16
13001 // vrgather.vv v13, v10, v16
13002 // vrgather.vv v12, v11, v16
13003 if (ContainerVT.bitsGT(RISCVTargetLowering::getM1VT(ContainerVT)) &&
13004 ContainerVT.getVectorElementCount().isKnownMultipleOf(2)) {
13005 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
13006 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, Lo.getSimpleValueType(), Lo);
13007 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, Hi.getSimpleValueType(), Hi);
13008 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ContainerVT, Hi, Lo);
13009
13010 // Fixed length vectors might not fit exactly into their container, and so
13011 // leave a gap in the front of the vector after being reversed. Slide this
13012 // away.
13013 //
13014 // x x x x 3 2 1 0 <- v4i16 @ vlen=128
13015 // 0 1 2 3 x x x x <- reverse
13016 // x x x x 0 1 2 3 <- vslidedown.vx
13017 if (VecVT.isFixedLengthVector()) {
13018 SDValue Offset = DAG.getNode(
13019 ISD::SUB, DL, XLenVT,
13020 DAG.getElementCount(DL, XLenVT, ContainerVT.getVectorElementCount()),
13021 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()));
13022 Concat =
13023 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
13024 DAG.getUNDEF(ContainerVT), Concat, Offset, Mask, VL);
13025 Concat = convertFromScalableVector(VecVT, Concat, DAG, Subtarget);
13026 }
13027 return Concat;
13028 }
13029
13030 unsigned EltSize = ContainerVT.getScalarSizeInBits();
13031 unsigned MinSize = ContainerVT.getSizeInBits().getKnownMinValue();
13032 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
13033 unsigned MaxVLMAX =
13034 VecVT.isFixedLengthVector()
13035 ? VecVT.getVectorNumElements()
13036 : RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
13037
13038 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
13039 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
13040
13041 // If this is SEW=8 and VLMAX is potentially more than 256, we need
13042 // to use vrgatherei16.vv.
13043 if (MaxVLMAX > 256 && EltSize == 8) {
13044 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
13045 // Reverse each half, then reassemble them in reverse order.
13046 // NOTE: It's also possible that after splitting that VLMAX no longer
13047 // requires vrgatherei16.vv.
13048 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
13049 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
13050 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
13051 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
13052 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
13053 // Reassemble the low and high pieces reversed.
13054 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Hi, Lo);
13055 }
13056
13057 // Just promote the int type to i16 which will double the LMUL.
13058 IntVT = MVT::getVectorVT(MVT::i16, ContainerVT.getVectorElementCount());
13059 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
13060 }
13061
13062 // At LMUL > 1, do the index computation in 16 bits to reduce register
13063 // pressure.
13064 if (IntVT.getScalarType().bitsGT(MVT::i16) &&
13065 IntVT.bitsGT(RISCVTargetLowering::getM1VT(IntVT))) {
13066 assert(isUInt<16>(MaxVLMAX - 1)); // Largest VLMAX is 65536 @ zvl65536b
13067 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
13068 IntVT = IntVT.changeVectorElementType(MVT::i16);
13069 }
13070
13071 // Calculate VLMAX-1 for the desired SEW.
13072 SDValue VLMinus1 = DAG.getNode(
13073 ISD::SUB, DL, XLenVT,
13074 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()),
13075 DAG.getConstant(1, DL, XLenVT));
13076
13077 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
13078 bool IsRV32E64 =
13079 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
13080 SDValue SplatVL;
13081 if (!IsRV32E64)
13082 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
13083 else
13084 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
13085 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
13086
13087 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
13088 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
13089 DAG.getUNDEF(IntVT), Mask, VL);
13090
13091 SDValue Gather = DAG.getNode(GatherOpc, DL, ContainerVT, Vec, Indices,
13092 DAG.getUNDEF(ContainerVT), Mask, VL);
13093 if (VecVT.isFixedLengthVector())
13094 Gather = convertFromScalableVector(VecVT, Gather, DAG, Subtarget);
13095 return Gather;
13096}
13097
13098SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
13099 SelectionDAG &DAG) const {
13100 SDLoc DL(Op);
13101 SDValue V1 = Op.getOperand(0);
13102 SDValue V2 = Op.getOperand(1);
13103 MVT XLenVT = Subtarget.getXLenVT();
13104 MVT VecVT = Op.getSimpleValueType();
13105
13106 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
13107
13108 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
13109 SDValue DownOffset, UpOffset;
13110 if (ImmValue >= 0) {
13111 // The operand is a TargetConstant, we need to rebuild it as a regular
13112 // constant.
13113 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
13114 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
13115 } else {
13116 // The operand is a TargetConstant, we need to rebuild it as a regular
13117 // constant rather than negating the original operand.
13118 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
13119 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
13120 }
13121
13122 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
13123
13124 SDValue SlideDown = getVSlidedown(
13125 DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1, DownOffset, TrueMask,
13126 Subtarget.hasVLDependentLatency() ? UpOffset
13127 : DAG.getRegister(RISCV::X0, XLenVT));
13128 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
13129 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
13131}
13132
13133SDValue
13134RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
13135 SelectionDAG &DAG) const {
13136 SDLoc DL(Op);
13137 auto *Load = cast<LoadSDNode>(Op);
13138
13140 Load->getMemoryVT(),
13141 *Load->getMemOperand()) &&
13142 "Expecting a correctly-aligned load");
13143
13144 MVT VT = Op.getSimpleValueType();
13145 MVT XLenVT = Subtarget.getXLenVT();
13146 MVT ContainerVT = getContainerForFixedLengthVector(VT);
13147
13148 // If we know the exact VLEN and our fixed length vector completely fills
13149 // the container, use a whole register load instead.
13150 const auto [MinVLMAX, MaxVLMAX] =
13151 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
13152 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
13153 RISCVTargetLowering::getM1VT(ContainerVT).bitsLE(ContainerVT)) {
13154 MachineMemOperand *MMO = Load->getMemOperand();
13155 SDValue NewLoad =
13156 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
13157 MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),
13158 MMO->getAAInfo(), MMO->getRanges());
13159 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
13160 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
13161 }
13162
13163 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
13164
13165 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
13166 SDValue IntID = DAG.getTargetConstant(
13167 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
13168 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
13169 if (!IsMaskOp)
13170 Ops.push_back(DAG.getUNDEF(ContainerVT));
13171 Ops.push_back(Load->getBasePtr());
13172 Ops.push_back(VL);
13173 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
13174 SDValue NewLoad =
13176 Load->getMemoryVT(), Load->getMemOperand());
13177
13178 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
13179 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
13180}
13181
13182SDValue
13183RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
13184 SelectionDAG &DAG) const {
13185 SDLoc DL(Op);
13186 auto *Store = cast<StoreSDNode>(Op);
13187
13189 Store->getMemoryVT(),
13190 *Store->getMemOperand()) &&
13191 "Expecting a correctly-aligned store");
13192
13193 SDValue StoreVal = Store->getValue();
13194 MVT VT = StoreVal.getSimpleValueType();
13195 MVT XLenVT = Subtarget.getXLenVT();
13196
13197 // If the size less than a byte, we need to pad with zeros to make a byte.
13198 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
13199 VT = MVT::v8i1;
13200 StoreVal =
13201 DAG.getInsertSubvector(DL, DAG.getConstant(0, DL, VT), StoreVal, 0);
13202 }
13203
13204 MVT ContainerVT = getContainerForFixedLengthVector(VT);
13205
13206 SDValue NewValue =
13207 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
13208
13209 // If we know the exact VLEN and our fixed length vector completely fills
13210 // the container, use a whole register store instead.
13211 const auto [MinVLMAX, MaxVLMAX] =
13212 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
13213 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
13214 RISCVTargetLowering::getM1VT(ContainerVT).bitsLE(ContainerVT)) {
13215 MachineMemOperand *MMO = Store->getMemOperand();
13216 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
13217 MMO->getPointerInfo(), MMO->getBaseAlign(),
13218 MMO->getFlags(), MMO->getAAInfo());
13219 }
13220
13221 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
13222
13223 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
13224 SDValue IntID = DAG.getTargetConstant(
13225 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
13226 return DAG.getMemIntrinsicNode(
13227 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
13228 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
13229 Store->getMemoryVT(), Store->getMemOperand());
13230}
13231
13232SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
13233 SelectionDAG &DAG) const {
13234 SDLoc DL(Op);
13235 MVT VT = Op.getSimpleValueType();
13236
13237 const auto *MemSD = cast<MemSDNode>(Op);
13238 EVT MemVT = MemSD->getMemoryVT();
13239 MachineMemOperand *MMO = MemSD->getMemOperand();
13240 SDValue Chain = MemSD->getChain();
13241 SDValue BasePtr = MemSD->getBasePtr();
13242
13243 SDValue Mask, PassThru, VL;
13244 bool IsExpandingLoad = false;
13245 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
13246 Mask = VPLoad->getMask();
13247 PassThru = DAG.getUNDEF(VT);
13248 VL = VPLoad->getVectorLength();
13249 } else {
13250 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
13251 Mask = MLoad->getMask();
13252 PassThru = MLoad->getPassThru();
13253 IsExpandingLoad = MLoad->isExpandingLoad();
13254 }
13255
13256 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
13257
13258 MVT XLenVT = Subtarget.getXLenVT();
13259
13260 MVT ContainerVT = VT;
13261 if (VT.isFixedLengthVector()) {
13262 ContainerVT = getContainerForFixedLengthVector(VT);
13263 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
13264 if (!IsUnmasked) {
13265 MVT MaskVT = getMaskTypeFor(ContainerVT);
13266 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13267 }
13268 }
13269
13270 if (!VL)
13271 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
13272
13273 SDValue ExpandingVL;
13274 if (!IsUnmasked && IsExpandingLoad) {
13275 ExpandingVL = VL;
13276 VL =
13277 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
13278 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
13279 }
13280
13281 unsigned IntID = IsUnmasked || IsExpandingLoad ? Intrinsic::riscv_vle
13282 : Intrinsic::riscv_vle_mask;
13283 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
13284 if (IntID == Intrinsic::riscv_vle)
13285 Ops.push_back(DAG.getUNDEF(ContainerVT));
13286 else
13287 Ops.push_back(PassThru);
13288 Ops.push_back(BasePtr);
13289 if (IntID == Intrinsic::riscv_vle_mask)
13290 Ops.push_back(Mask);
13291 Ops.push_back(VL);
13292 if (IntID == Intrinsic::riscv_vle_mask)
13293 Ops.push_back(DAG.getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT));
13294
13295 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
13296
13297 SDValue Result =
13298 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
13299 Chain = Result.getValue(1);
13300 if (ExpandingVL) {
13301 MVT IndexVT = ContainerVT;
13302 if (ContainerVT.isFloatingPoint())
13303 IndexVT = ContainerVT.changeVectorElementTypeToInteger();
13304
13305 MVT IndexEltVT = IndexVT.getVectorElementType();
13306 bool UseVRGATHEREI16 = false;
13307 // If index vector is an i8 vector and the element count exceeds 256, we
13308 // should change the element type of index vector to i16 to avoid
13309 // overflow.
13310 if (IndexEltVT == MVT::i8 && VT.getVectorNumElements() > 256) {
13311 // FIXME: We need to do vector splitting manually for LMUL=8 cases.
13312 assert(getLMUL(IndexVT) != RISCVVType::LMUL_8);
13313 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
13314 UseVRGATHEREI16 = true;
13315 }
13316
13317 SDValue Iota =
13318 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
13319 DAG.getConstant(Intrinsic::riscv_viota, DL, XLenVT),
13320 DAG.getUNDEF(IndexVT), Mask, ExpandingVL);
13321 Result =
13322 DAG.getNode(UseVRGATHEREI16 ? RISCVISD::VRGATHEREI16_VV_VL
13323 : RISCVISD::VRGATHER_VV_VL,
13324 DL, ContainerVT, Result, Iota, PassThru, Mask, ExpandingVL);
13325 }
13326
13327 if (VT.isFixedLengthVector())
13328 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
13329
13330 return DAG.getMergeValues({Result, Chain}, DL);
13331}
13332
13333SDValue RISCVTargetLowering::lowerLoadFF(SDValue Op, SelectionDAG &DAG) const {
13334 SDLoc DL(Op);
13335 MVT VT = Op->getSimpleValueType(0);
13336
13337 const auto *VPLoadFF = cast<VPLoadFFSDNode>(Op);
13338 EVT MemVT = VPLoadFF->getMemoryVT();
13339 MachineMemOperand *MMO = VPLoadFF->getMemOperand();
13340 SDValue Chain = VPLoadFF->getChain();
13341 SDValue BasePtr = VPLoadFF->getBasePtr();
13342
13343 SDValue Mask = VPLoadFF->getMask();
13344 SDValue VL = VPLoadFF->getVectorLength();
13345
13346 MVT XLenVT = Subtarget.getXLenVT();
13347
13348 MVT ContainerVT = VT;
13349 if (VT.isFixedLengthVector()) {
13350 ContainerVT = getContainerForFixedLengthVector(VT);
13351 MVT MaskVT = getMaskTypeFor(ContainerVT);
13352 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13353 }
13354
13355 unsigned IntID = Intrinsic::riscv_vleff_mask;
13356 SDValue Ops[] = {
13357 Chain,
13358 DAG.getTargetConstant(IntID, DL, XLenVT),
13359 DAG.getUNDEF(ContainerVT),
13360 BasePtr,
13361 Mask,
13362 VL,
13364
13365 SDVTList VTs = DAG.getVTList({ContainerVT, Op->getValueType(1), MVT::Other});
13366
13367 SDValue Result =
13368 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
13369 SDValue OutVL = Result.getValue(1);
13370 Chain = Result.getValue(2);
13371
13372 if (VT.isFixedLengthVector())
13373 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
13374
13375 return DAG.getMergeValues({Result, OutVL, Chain}, DL);
13376}
13377
13378SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
13379 SelectionDAG &DAG) const {
13380 SDLoc DL(Op);
13381
13382 const auto *MemSD = cast<MemSDNode>(Op);
13383 EVT MemVT = MemSD->getMemoryVT();
13384 MachineMemOperand *MMO = MemSD->getMemOperand();
13385 SDValue Chain = MemSD->getChain();
13386 SDValue BasePtr = MemSD->getBasePtr();
13387 SDValue Val, Mask, VL;
13388
13389 bool IsCompressingStore = false;
13390 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
13391 Val = VPStore->getValue();
13392 Mask = VPStore->getMask();
13393 VL = VPStore->getVectorLength();
13394 } else {
13395 const auto *MStore = cast<MaskedStoreSDNode>(Op);
13396 Val = MStore->getValue();
13397 Mask = MStore->getMask();
13398 IsCompressingStore = MStore->isCompressingStore();
13399 }
13400
13401 bool IsUnmasked =
13402 ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
13403
13404 MVT VT = Val.getSimpleValueType();
13405 MVT XLenVT = Subtarget.getXLenVT();
13406
13407 MVT ContainerVT = VT;
13408 if (VT.isFixedLengthVector()) {
13409 ContainerVT = getContainerForFixedLengthVector(VT);
13410
13411 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
13412 if (!IsUnmasked || IsCompressingStore) {
13413 MVT MaskVT = getMaskTypeFor(ContainerVT);
13414 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13415 }
13416 }
13417
13418 if (!VL)
13419 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
13420
13421 if (IsCompressingStore) {
13422 Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
13423 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
13424 DAG.getUNDEF(ContainerVT), Val, Mask, VL);
13425 VL =
13426 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
13427 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
13428 }
13429
13430 unsigned IntID =
13431 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
13432 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
13433 Ops.push_back(Val);
13434 Ops.push_back(BasePtr);
13435 if (!IsUnmasked)
13436 Ops.push_back(Mask);
13437 Ops.push_back(VL);
13438
13440 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
13441}
13442
13443SDValue RISCVTargetLowering::lowerVectorCompress(SDValue Op,
13444 SelectionDAG &DAG) const {
13445 SDLoc DL(Op);
13446 SDValue Val = Op.getOperand(0);
13447 SDValue Mask = Op.getOperand(1);
13448 SDValue Passthru = Op.getOperand(2);
13449
13450 MVT VT = Val.getSimpleValueType();
13451 MVT XLenVT = Subtarget.getXLenVT();
13452 MVT ContainerVT = VT;
13453 if (VT.isFixedLengthVector()) {
13454 ContainerVT = getContainerForFixedLengthVector(VT);
13455 MVT MaskVT = getMaskTypeFor(ContainerVT);
13456 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
13457 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13458 Passthru = convertToScalableVector(ContainerVT, Passthru, DAG, Subtarget);
13459 }
13460
13461 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
13462 SDValue Res =
13463 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
13464 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
13465 Passthru, Val, Mask, VL);
13466
13467 if (VT.isFixedLengthVector())
13468 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
13469
13470 return Res;
13471}
13472
13473SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
13474 SelectionDAG &DAG) const {
13475 unsigned Opc = Op.getOpcode();
13476 SDLoc DL(Op);
13477 SDValue Chain = Op.getOperand(0);
13478 SDValue Op1 = Op.getOperand(1);
13479 SDValue Op2 = Op.getOperand(2);
13480 SDValue CC = Op.getOperand(3);
13481 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
13482 MVT VT = Op.getSimpleValueType();
13483 MVT InVT = Op1.getSimpleValueType();
13484
13485 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
13486 // condition code.
13487 if (Opc == ISD::STRICT_FSETCCS) {
13488 // Expand strict_fsetccs(x, oeq) to
13489 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
13490 SDVTList VTList = Op->getVTList();
13491 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
13492 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
13493 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
13494 Op2, OLECCVal);
13495 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
13496 Op1, OLECCVal);
13497 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
13498 Tmp1.getValue(1), Tmp2.getValue(1));
13499 // Tmp1 and Tmp2 might be the same node.
13500 if (Tmp1 != Tmp2)
13501 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
13502 return DAG.getMergeValues({Tmp1, OutChain}, DL);
13503 }
13504
13505 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
13506 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
13507 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
13508 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
13509 Op2, OEQCCVal);
13510 SDValue Res = DAG.getNOT(DL, OEQ, VT);
13511 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
13512 }
13513 }
13514
13515 MVT ContainerInVT = InVT;
13516 if (InVT.isFixedLengthVector()) {
13517 ContainerInVT = getContainerForFixedLengthVector(InVT);
13518 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
13519 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
13520 }
13521 MVT MaskVT = getMaskTypeFor(ContainerInVT);
13522
13523 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
13524
13525 SDValue Res;
13526 if (Opc == ISD::STRICT_FSETCC &&
13527 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
13528 CCVal == ISD::SETOLE)) {
13529 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
13530 // active when both input elements are ordered.
13531 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
13532 SDValue OrderMask1 = DAG.getNode(
13533 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
13534 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
13535 True, VL});
13536 SDValue OrderMask2 = DAG.getNode(
13537 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
13538 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
13539 True, VL});
13540 Mask =
13541 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
13542 // Use Mask as the passthru operand to let the result be 0 if either of the
13543 // inputs is unordered.
13544 Res = DAG.getNode(RISCVISD::STRICT_FSETCCS_VL, DL,
13545 DAG.getVTList(MaskVT, MVT::Other),
13546 {Chain, Op1, Op2, CC, Mask, Mask, VL});
13547 } else {
13548 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
13549 : RISCVISD::STRICT_FSETCCS_VL;
13550 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
13551 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
13552 }
13553
13554 if (VT.isFixedLengthVector()) {
13555 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
13556 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
13557 }
13558 return Res;
13559}
13560
13561// Lower vector ABS to smax(X, sub(0, X)).
13562SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
13563 SDLoc DL(Op);
13564 MVT VT = Op.getSimpleValueType();
13565 SDValue X = Op.getOperand(0);
13566
13567 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
13568 "Unexpected type for ISD::ABS");
13569
13570 MVT ContainerVT = VT;
13571 if (VT.isFixedLengthVector()) {
13572 ContainerVT = getContainerForFixedLengthVector(VT);
13573 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
13574 }
13575
13576 SDValue Mask, VL;
13577 if (Op->getOpcode() == ISD::VP_ABS) {
13578 Mask = Op->getOperand(1);
13579 if (VT.isFixedLengthVector())
13580 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
13581 Subtarget);
13582 VL = Op->getOperand(2);
13583 } else
13584 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
13585
13586 SDValue SplatZero = DAG.getNode(
13587 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
13588 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
13589 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
13590 DAG.getUNDEF(ContainerVT), Mask, VL);
13591 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
13592 DAG.getUNDEF(ContainerVT), Mask, VL);
13593
13594 if (VT.isFixedLengthVector())
13595 Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
13596 return Max;
13597}
13598
13599SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
13600 SelectionDAG &DAG) const {
13601 const auto &TSInfo =
13602 static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());
13603
13604 unsigned NewOpc = getRISCVVLOp(Op);
13605 bool HasPassthruOp = TSInfo.hasPassthruOp(NewOpc);
13606 bool HasMask = TSInfo.hasMaskOp(NewOpc);
13607
13608 MVT VT = Op.getSimpleValueType();
13609 MVT ContainerVT = getContainerForFixedLengthVector(VT);
13610
13611 // Create list of operands by converting existing ones to scalable types.
13613 for (const SDValue &V : Op->op_values()) {
13614 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
13615
13616 // Pass through non-vector operands.
13617 if (!V.getValueType().isVector()) {
13618 Ops.push_back(V);
13619 continue;
13620 }
13621
13622 // "cast" fixed length vector to a scalable vector.
13623 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
13624 "Only fixed length vectors are supported!");
13625 MVT VContainerVT = ContainerVT.changeVectorElementType(
13626 V.getSimpleValueType().getVectorElementType());
13627 Ops.push_back(convertToScalableVector(VContainerVT, V, DAG, Subtarget));
13628 }
13629
13630 SDLoc DL(Op);
13631 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
13632 if (HasPassthruOp)
13633 Ops.push_back(DAG.getUNDEF(ContainerVT));
13634 if (HasMask)
13635 Ops.push_back(Mask);
13636 Ops.push_back(VL);
13637
13638 // StrictFP operations have two result values. Their lowered result should
13639 // have same result count.
13640 if (Op->isStrictFPOpcode()) {
13641 SDValue ScalableRes =
13642 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
13643 Op->getFlags());
13644 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
13645 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
13646 }
13647
13648 SDValue ScalableRes =
13649 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
13650 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
13651}
13652
13653// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
13654// * Operands of each node are assumed to be in the same order.
13655// * The EVL operand is promoted from i32 to i64 on RV64.
13656// * Fixed-length vectors are converted to their scalable-vector container
13657// types.
13658SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
13659 const auto &TSInfo =
13660 static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());
13661
13662 unsigned RISCVISDOpc = getRISCVVLOp(Op);
13663 bool HasPassthruOp = TSInfo.hasPassthruOp(RISCVISDOpc);
13664
13665 SDLoc DL(Op);
13666 MVT VT = Op.getSimpleValueType();
13668
13669 MVT ContainerVT = VT;
13670 if (VT.isFixedLengthVector())
13671 ContainerVT = getContainerForFixedLengthVector(VT);
13672
13673 for (const auto &OpIdx : enumerate(Op->ops())) {
13674 SDValue V = OpIdx.value();
13675 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
13676 // Add dummy passthru value before the mask. Or if there isn't a mask,
13677 // before EVL.
13678 if (HasPassthruOp) {
13679 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());
13680 if (MaskIdx) {
13681 if (*MaskIdx == OpIdx.index())
13682 Ops.push_back(DAG.getUNDEF(ContainerVT));
13683 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
13684 OpIdx.index()) {
13685 if (Op.getOpcode() == ISD::VP_MERGE) {
13686 // For VP_MERGE, copy the false operand instead of an undef value.
13687 Ops.push_back(Ops.back());
13688 } else {
13689 assert(Op.getOpcode() == ISD::VP_SELECT);
13690 // For VP_SELECT, add an undef value.
13691 Ops.push_back(DAG.getUNDEF(ContainerVT));
13692 }
13693 }
13694 }
13695 // VFCVT_RM_X_F_VL requires a rounding mode to be injected before the VL.
13696 if (RISCVISDOpc == RISCVISD::VFCVT_RM_X_F_VL &&
13697 ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == OpIdx.index())
13699 Subtarget.getXLenVT()));
13700 // Pass through operands which aren't fixed-length vectors.
13701 if (!V.getValueType().isFixedLengthVector()) {
13702 Ops.push_back(V);
13703 continue;
13704 }
13705 // "cast" fixed length vector to a scalable vector.
13706 MVT OpVT = V.getSimpleValueType();
13707 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
13708 assert(useRVVForFixedLengthVectorVT(OpVT) &&
13709 "Only fixed length vectors are supported!");
13710 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
13711 }
13712
13713 if (!VT.isFixedLengthVector())
13714 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
13715
13716 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
13717
13718 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
13719}
13720
13721SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
13722 SelectionDAG &DAG) const {
13723 SDLoc DL(Op);
13724 MVT VT = Op.getSimpleValueType();
13725
13726 SDValue Src = Op.getOperand(0);
13727 // NOTE: Mask is dropped.
13728 SDValue VL = Op.getOperand(2);
13729
13730 MVT ContainerVT = VT;
13731 if (VT.isFixedLengthVector()) {
13732 ContainerVT = getContainerForFixedLengthVector(VT);
13733 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
13734 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
13735 }
13736
13737 MVT XLenVT = Subtarget.getXLenVT();
13738 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
13739 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13740 DAG.getUNDEF(ContainerVT), Zero, VL);
13741
13742 SDValue SplatValue = DAG.getSignedConstant(
13743 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
13744 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13745 DAG.getUNDEF(ContainerVT), SplatValue, VL);
13746
13747 SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat,
13748 ZeroSplat, DAG.getUNDEF(ContainerVT), VL);
13749 if (!VT.isFixedLengthVector())
13750 return Result;
13751 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13752}
13753
13754SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
13755 SelectionDAG &DAG) const {
13756 SDLoc DL(Op);
13757 MVT VT = Op.getSimpleValueType();
13758
13759 SDValue Op1 = Op.getOperand(0);
13760 SDValue Op2 = Op.getOperand(1);
13761 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
13762 // NOTE: Mask is dropped.
13763 SDValue VL = Op.getOperand(4);
13764
13765 MVT ContainerVT = VT;
13766 if (VT.isFixedLengthVector()) {
13767 ContainerVT = getContainerForFixedLengthVector(VT);
13768 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
13769 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
13770 }
13771
13773 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
13774
13775 switch (Condition) {
13776 default:
13777 break;
13778 // X != Y --> (X^Y)
13779 case ISD::SETNE:
13780 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
13781 break;
13782 // X == Y --> ~(X^Y)
13783 case ISD::SETEQ: {
13784 SDValue Temp =
13785 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
13786 Result =
13787 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
13788 break;
13789 }
13790 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
13791 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
13792 case ISD::SETGT:
13793 case ISD::SETULT: {
13794 SDValue Temp =
13795 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
13796 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
13797 break;
13798 }
13799 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
13800 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
13801 case ISD::SETLT:
13802 case ISD::SETUGT: {
13803 SDValue Temp =
13804 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
13805 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
13806 break;
13807 }
13808 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
13809 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
13810 case ISD::SETGE:
13811 case ISD::SETULE: {
13812 SDValue Temp =
13813 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
13814 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
13815 break;
13816 }
13817 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
13818 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
13819 case ISD::SETLE:
13820 case ISD::SETUGE: {
13821 SDValue Temp =
13822 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
13823 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
13824 break;
13825 }
13826 }
13827
13828 if (!VT.isFixedLengthVector())
13829 return Result;
13830 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13831}
13832
13833// Lower Floating-Point/Integer Type-Convert VP SDNodes
13834SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
13835 SelectionDAG &DAG) const {
13836 SDLoc DL(Op);
13837
13838 SDValue Src = Op.getOperand(0);
13839 SDValue Mask = Op.getOperand(1);
13840 SDValue VL = Op.getOperand(2);
13841 unsigned RISCVISDOpc = getRISCVVLOp(Op);
13842
13843 MVT DstVT = Op.getSimpleValueType();
13844 MVT SrcVT = Src.getSimpleValueType();
13845 if (DstVT.isFixedLengthVector()) {
13846 DstVT = getContainerForFixedLengthVector(DstVT);
13847 SrcVT = getContainerForFixedLengthVector(SrcVT);
13848 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
13849 MVT MaskVT = getMaskTypeFor(DstVT);
13850 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
13851 }
13852
13853 unsigned DstEltSize = DstVT.getScalarSizeInBits();
13854 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
13855
13857 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
13858 if (SrcVT.isInteger()) {
13859 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
13860
13861 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
13862 ? RISCVISD::VSEXT_VL
13863 : RISCVISD::VZEXT_VL;
13864
13865 // Do we need to do any pre-widening before converting?
13866 if (SrcEltSize == 1) {
13867 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
13868 MVT XLenVT = Subtarget.getXLenVT();
13869 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
13870 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
13871 DAG.getUNDEF(IntVT), Zero, VL);
13872 SDValue One = DAG.getSignedConstant(
13873 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
13874 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
13875 DAG.getUNDEF(IntVT), One, VL);
13876 Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat,
13877 ZeroSplat, DAG.getUNDEF(IntVT), VL);
13878 } else if (DstEltSize > (2 * SrcEltSize)) {
13879 // Widen before converting.
13880 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
13881 DstVT.getVectorElementCount());
13882 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
13883 }
13884
13885 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
13886 } else {
13887 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
13888 "Wrong input/output vector types");
13889
13890 // Convert f16 to f32 then convert f32 to i64.
13891 if (DstEltSize > (2 * SrcEltSize)) {
13892 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
13893 MVT InterimFVT =
13894 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
13895 Src =
13896 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
13897 }
13898
13899 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
13900 }
13901 } else { // Narrowing + Conversion
13902 if (SrcVT.isInteger()) {
13903 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
13904 // First do a narrowing convert to an FP type half the size, then round
13905 // the FP type to a small FP type if needed.
13906
13907 MVT InterimFVT = DstVT;
13908 if (SrcEltSize > (2 * DstEltSize)) {
13909 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
13910 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
13911 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
13912 }
13913
13914 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
13915
13916 if (InterimFVT != DstVT) {
13917 Src = Result;
13918 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
13919 }
13920 } else {
13921 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
13922 "Wrong input/output vector types");
13923 // First do a narrowing conversion to an integer half the size, then
13924 // truncate if needed.
13925
13926 if (DstEltSize == 1) {
13927 // First convert to the same size integer, then convert to mask using
13928 // setcc.
13929 assert(SrcEltSize >= 16 && "Unexpected FP type!");
13930 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
13931 DstVT.getVectorElementCount());
13932 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
13933
13934 // Compare the integer result to 0. The integer should be 0 or 1/-1,
13935 // otherwise the conversion was undefined.
13936 MVT XLenVT = Subtarget.getXLenVT();
13937 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
13938 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
13939 DAG.getUNDEF(InterimIVT), SplatZero, VL);
13940 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
13941 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
13942 DAG.getUNDEF(DstVT), Mask, VL});
13943 } else {
13944 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
13945 DstVT.getVectorElementCount());
13946
13947 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
13948
13949 while (InterimIVT != DstVT) {
13950 SrcEltSize /= 2;
13951 Src = Result;
13952 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
13953 DstVT.getVectorElementCount());
13954 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
13955 Src, Mask, VL);
13956 }
13957 }
13958 }
13959 }
13960
13961 MVT VT = Op.getSimpleValueType();
13962 if (!VT.isFixedLengthVector())
13963 return Result;
13964 return convertFromScalableVector(VT, Result, DAG, Subtarget);
13965}
13966
13967SDValue RISCVTargetLowering::lowerVPMergeMask(SDValue Op,
13968 SelectionDAG &DAG) const {
13969 SDLoc DL(Op);
13970 MVT VT = Op.getSimpleValueType();
13971 MVT XLenVT = Subtarget.getXLenVT();
13972
13973 SDValue Mask = Op.getOperand(0);
13974 SDValue TrueVal = Op.getOperand(1);
13975 SDValue FalseVal = Op.getOperand(2);
13976 SDValue VL = Op.getOperand(3);
13977
13978 // Use default legalization if a vector of EVL type would be legal.
13979 EVT EVLVecVT = EVT::getVectorVT(*DAG.getContext(), VL.getValueType(),
13981 if (isTypeLegal(EVLVecVT))
13982 return SDValue();
13983
13984 MVT ContainerVT = VT;
13985 if (VT.isFixedLengthVector()) {
13986 ContainerVT = getContainerForFixedLengthVector(VT);
13987 Mask = convertToScalableVector(ContainerVT, Mask, DAG, Subtarget);
13988 TrueVal = convertToScalableVector(ContainerVT, TrueVal, DAG, Subtarget);
13989 FalseVal = convertToScalableVector(ContainerVT, FalseVal, DAG, Subtarget);
13990 }
13991
13992 // Promote to a vector of i8.
13993 MVT PromotedVT = ContainerVT.changeVectorElementType(MVT::i8);
13994
13995 // Promote TrueVal and FalseVal using VLMax.
13996 // FIXME: Is there a better way to do this?
13997 SDValue VLMax = DAG.getRegister(RISCV::X0, XLenVT);
13998 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
13999 DAG.getUNDEF(PromotedVT),
14000 DAG.getConstant(1, DL, XLenVT), VLMax);
14001 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
14002 DAG.getUNDEF(PromotedVT),
14003 DAG.getConstant(0, DL, XLenVT), VLMax);
14004 TrueVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, TrueVal, SplatOne,
14005 SplatZero, DAG.getUNDEF(PromotedVT), VL);
14006 // Any element past VL uses FalseVal, so use VLMax
14007 FalseVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, FalseVal,
14008 SplatOne, SplatZero, DAG.getUNDEF(PromotedVT), VLMax);
14009
14010 // VP_MERGE the two promoted values.
14011 SDValue VPMerge = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, Mask,
14012 TrueVal, FalseVal, FalseVal, VL);
14013
14014 // Convert back to mask.
14015 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
14016 SDValue Result = DAG.getNode(
14017 RISCVISD::SETCC_VL, DL, ContainerVT,
14018 {VPMerge, DAG.getConstant(0, DL, PromotedVT), DAG.getCondCode(ISD::SETNE),
14019 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), TrueMask, VLMax});
14020
14021 if (VT.isFixedLengthVector())
14022 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
14023 return Result;
14024}
14025
14026SDValue
14027RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
14028 SelectionDAG &DAG) const {
14029 using namespace SDPatternMatch;
14030
14031 SDLoc DL(Op);
14032
14033 SDValue Op1 = Op.getOperand(0);
14034 SDValue Op2 = Op.getOperand(1);
14035 SDValue Offset = Op.getOperand(2);
14036 SDValue Mask = Op.getOperand(3);
14037 SDValue EVL1 = Op.getOperand(4);
14038 SDValue EVL2 = Op.getOperand(5);
14039
14040 const MVT XLenVT = Subtarget.getXLenVT();
14041 MVT VT = Op.getSimpleValueType();
14042 MVT ContainerVT = VT;
14043 if (VT.isFixedLengthVector()) {
14044 ContainerVT = getContainerForFixedLengthVector(VT);
14045 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
14046 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
14047 MVT MaskVT = getMaskTypeFor(ContainerVT);
14048 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14049 }
14050
14051 bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
14052 if (IsMaskVector) {
14053 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
14054
14055 // Expand input operands
14056 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
14057 DAG.getUNDEF(ContainerVT),
14058 DAG.getConstant(1, DL, XLenVT), EVL1);
14059 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
14060 DAG.getUNDEF(ContainerVT),
14061 DAG.getConstant(0, DL, XLenVT), EVL1);
14062 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1,
14063 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);
14064
14065 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
14066 DAG.getUNDEF(ContainerVT),
14067 DAG.getConstant(1, DL, XLenVT), EVL2);
14068 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
14069 DAG.getUNDEF(ContainerVT),
14070 DAG.getConstant(0, DL, XLenVT), EVL2);
14071 Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2,
14072 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
14073 }
14074
14075 auto getVectorFirstEle = [](SDValue Vec) {
14076 SDValue FirstEle;
14077 if (sd_match(Vec, m_InsertElt(m_Value(), m_Value(FirstEle), m_Zero())))
14078 return FirstEle;
14079
14080 if (Vec.getOpcode() == ISD::SPLAT_VECTOR ||
14082 return Vec.getOperand(0);
14083
14084 return SDValue();
14085 };
14086
14087 if (!IsMaskVector && isNullConstant(Offset) && isOneConstant(EVL1))
14088 if (auto FirstEle = getVectorFirstEle(Op->getOperand(0))) {
14089 MVT EltVT = ContainerVT.getVectorElementType();
14091 if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
14092 EltVT == MVT::bf16) {
14093 EltVT = EltVT.changeTypeToInteger();
14094 ContainerVT = ContainerVT.changeVectorElementType(EltVT);
14095 Op2 = DAG.getBitcast(ContainerVT, Op2);
14096 FirstEle =
14097 DAG.getAnyExtOrTrunc(DAG.getBitcast(EltVT, FirstEle), DL, XLenVT);
14098 }
14099 Result = DAG.getNode(EltVT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL
14100 : RISCVISD::VSLIDE1UP_VL,
14101 DL, ContainerVT, DAG.getUNDEF(ContainerVT), Op2,
14102 FirstEle, Mask, EVL2);
14103 Result = DAG.getBitcast(
14105 Result);
14106 return VT.isFixedLengthVector()
14107 ? convertFromScalableVector(VT, Result, DAG, Subtarget)
14108 : Result;
14109 }
14110
14111 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
14112 SDValue DownOffset, UpOffset;
14113 if (ImmValue >= 0) {
14114 // The operand is a TargetConstant, we need to rebuild it as a regular
14115 // constant.
14116 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
14117 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);
14118 } else {
14119 // The operand is a TargetConstant, we need to rebuild it as a regular
14120 // constant rather than negating the original operand.
14121 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
14122 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
14123 }
14124
14125 if (ImmValue != 0)
14126 Op1 = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
14127 DAG.getUNDEF(ContainerVT), Op1, DownOffset, Mask,
14128 Subtarget.hasVLDependentLatency() ? UpOffset : EVL2);
14129 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, Op1, Op2,
14130 UpOffset, Mask, EVL2, RISCVVType::TAIL_AGNOSTIC);
14131
14132 if (IsMaskVector) {
14133 // Truncate Result back to a mask vector (Result has same EVL as Op2)
14134 Result = DAG.getNode(
14135 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
14136 {Result, DAG.getConstant(0, DL, ContainerVT),
14137 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
14138 Mask, EVL2});
14139 }
14140
14141 if (!VT.isFixedLengthVector())
14142 return Result;
14143 return convertFromScalableVector(VT, Result, DAG, Subtarget);
14144}
14145
14146SDValue
14147RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
14148 SelectionDAG &DAG) const {
14149 SDLoc DL(Op);
14150 MVT VT = Op.getSimpleValueType();
14151 MVT XLenVT = Subtarget.getXLenVT();
14152
14153 SDValue Op1 = Op.getOperand(0);
14154 SDValue Mask = Op.getOperand(1);
14155 SDValue EVL = Op.getOperand(2);
14156
14157 MVT ContainerVT = VT;
14158 if (VT.isFixedLengthVector()) {
14159 ContainerVT = getContainerForFixedLengthVector(VT);
14160 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
14161 MVT MaskVT = getMaskTypeFor(ContainerVT);
14162 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14163 }
14164
14165 MVT GatherVT = ContainerVT;
14166 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
14167 // Check if we are working with mask vectors
14168 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
14169 if (IsMaskVector) {
14170 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
14171
14172 // Expand input operand
14173 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
14174 DAG.getUNDEF(IndicesVT),
14175 DAG.getConstant(1, DL, XLenVT), EVL);
14176 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
14177 DAG.getUNDEF(IndicesVT),
14178 DAG.getConstant(0, DL, XLenVT), EVL);
14179 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne,
14180 SplatZero, DAG.getUNDEF(IndicesVT), EVL);
14181 }
14182
14183 unsigned EltSize = GatherVT.getScalarSizeInBits();
14184 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
14185 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
14186 unsigned MaxVLMAX =
14187 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
14188
14189 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
14190 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
14191 // to use vrgatherei16.vv.
14192 // TODO: It's also possible to use vrgatherei16.vv for other types to
14193 // decrease register width for the index calculation.
14194 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
14195 if (MaxVLMAX > 256 && EltSize == 8) {
14196 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
14197 // Split the vector in half and reverse each half using a full register
14198 // reverse.
14199 // Swap the halves and concatenate them.
14200 // Slide the concatenated result by (VLMax - VL).
14201 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
14202 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
14203 auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
14204
14205 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
14206 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
14207
14208 // Reassemble the low and high pieces reversed.
14209 // NOTE: this Result is unmasked (because we do not need masks for
14210 // shuffles). If in the future this has to change, we can use a SELECT_VL
14211 // between Result and UNDEF using the mask originally passed to VP_REVERSE
14212 SDValue Result =
14213 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
14214
14215 // Slide off any elements from past EVL that were reversed into the low
14216 // elements.
14217 SDValue VLMax =
14218 DAG.getElementCount(DL, XLenVT, GatherVT.getVectorElementCount());
14219 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
14220
14221 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
14222 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
14223
14224 if (IsMaskVector) {
14225 // Truncate Result back to a mask vector
14226 Result =
14227 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
14228 {Result, DAG.getConstant(0, DL, GatherVT),
14230 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
14231 }
14232
14233 if (!VT.isFixedLengthVector())
14234 return Result;
14235 return convertFromScalableVector(VT, Result, DAG, Subtarget);
14236 }
14237
14238 // Just promote the int type to i16 which will double the LMUL.
14239 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
14240 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
14241 }
14242
14243 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
14244 SDValue VecLen =
14245 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
14246 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
14247 DAG.getUNDEF(IndicesVT), VecLen, EVL);
14248 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
14249 DAG.getUNDEF(IndicesVT), Mask, EVL);
14250 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
14251 DAG.getUNDEF(GatherVT), Mask, EVL);
14252
14253 if (IsMaskVector) {
14254 // Truncate Result back to a mask vector
14255 Result = DAG.getNode(
14256 RISCVISD::SETCC_VL, DL, ContainerVT,
14257 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
14258 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
14259 }
14260
14261 if (!VT.isFixedLengthVector())
14262 return Result;
14263 return convertFromScalableVector(VT, Result, DAG, Subtarget);
14264}
14265
14266SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
14267 SelectionDAG &DAG) const {
14268 MVT VT = Op.getSimpleValueType();
14269 if (VT.getVectorElementType() != MVT::i1)
14270 return lowerVPOp(Op, DAG);
14271
14272 // It is safe to drop mask parameter as masked-off elements are undef.
14273 SDValue Op1 = Op->getOperand(0);
14274 SDValue Op2 = Op->getOperand(1);
14275 SDValue VL = Op->getOperand(3);
14276
14277 MVT ContainerVT = VT;
14278 const bool IsFixed = VT.isFixedLengthVector();
14279 if (IsFixed) {
14280 ContainerVT = getContainerForFixedLengthVector(VT);
14281 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
14282 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
14283 }
14284
14285 SDLoc DL(Op);
14286 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
14287 if (!IsFixed)
14288 return Val;
14289 return convertFromScalableVector(VT, Val, DAG, Subtarget);
14290}
14291
14292SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
14293 SelectionDAG &DAG) const {
14294 SDLoc DL(Op);
14295 MVT XLenVT = Subtarget.getXLenVT();
14296 MVT VT = Op.getSimpleValueType();
14297 MVT ContainerVT = VT;
14298 if (VT.isFixedLengthVector())
14299 ContainerVT = getContainerForFixedLengthVector(VT);
14300
14301 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
14302
14303 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
14304 // Check if the mask is known to be all ones
14305 SDValue Mask = VPNode->getMask();
14306 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
14307
14308 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
14309 : Intrinsic::riscv_vlse_mask,
14310 DL, XLenVT);
14311 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
14312 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
14313 VPNode->getStride()};
14314 if (!IsUnmasked) {
14315 if (VT.isFixedLengthVector()) {
14316 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
14317 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14318 }
14319 Ops.push_back(Mask);
14320 }
14321 Ops.push_back(VPNode->getVectorLength());
14322 if (!IsUnmasked) {
14323 SDValue Policy =
14325 Ops.push_back(Policy);
14326 }
14327
14328 SDValue Result =
14330 VPNode->getMemoryVT(), VPNode->getMemOperand());
14331 SDValue Chain = Result.getValue(1);
14332
14333 if (VT.isFixedLengthVector())
14334 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
14335
14336 return DAG.getMergeValues({Result, Chain}, DL);
14337}
14338
14339SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
14340 SelectionDAG &DAG) const {
14341 SDLoc DL(Op);
14342 MVT XLenVT = Subtarget.getXLenVT();
14343
14344 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
14345 SDValue StoreVal = VPNode->getValue();
14346 MVT VT = StoreVal.getSimpleValueType();
14347 MVT ContainerVT = VT;
14348 if (VT.isFixedLengthVector()) {
14349 ContainerVT = getContainerForFixedLengthVector(VT);
14350 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
14351 }
14352
14353 // Check if the mask is known to be all ones
14354 SDValue Mask = VPNode->getMask();
14355 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
14356
14357 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
14358 : Intrinsic::riscv_vsse_mask,
14359 DL, XLenVT);
14360 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
14361 VPNode->getBasePtr(), VPNode->getStride()};
14362 if (!IsUnmasked) {
14363 if (VT.isFixedLengthVector()) {
14364 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
14365 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14366 }
14367 Ops.push_back(Mask);
14368 }
14369 Ops.push_back(VPNode->getVectorLength());
14370
14371 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
14372 Ops, VPNode->getMemoryVT(),
14373 VPNode->getMemOperand());
14374}
14375
14376// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
14377// matched to a RVV indexed load. The RVV indexed load instructions only
14378// support the "unsigned unscaled" addressing mode; indices are implicitly
14379// zero-extended or truncated to XLEN and are treated as byte offsets. Any
14380// signed or scaled indexing is extended to the XLEN value type and scaled
14381// accordingly.
14382SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
14383 SelectionDAG &DAG) const {
14384 SDLoc DL(Op);
14385 MVT VT = Op.getSimpleValueType();
14386
14387 const auto *MemSD = cast<MemSDNode>(Op.getNode());
14388 EVT MemVT = MemSD->getMemoryVT();
14389 MachineMemOperand *MMO = MemSD->getMemOperand();
14390 SDValue Chain = MemSD->getChain();
14391 SDValue BasePtr = MemSD->getBasePtr();
14392
14393 [[maybe_unused]] ISD::LoadExtType LoadExtType;
14394 SDValue Index, Mask, PassThru, VL;
14395
14396 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
14397 Index = VPGN->getIndex();
14398 Mask = VPGN->getMask();
14399 PassThru = DAG.getUNDEF(VT);
14400 VL = VPGN->getVectorLength();
14401 // VP doesn't support extending loads.
14403 } else {
14404 // Else it must be a MGATHER.
14405 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
14406 Index = MGN->getIndex();
14407 Mask = MGN->getMask();
14408 PassThru = MGN->getPassThru();
14409 LoadExtType = MGN->getExtensionType();
14410 }
14411
14412 MVT IndexVT = Index.getSimpleValueType();
14413 MVT XLenVT = Subtarget.getXLenVT();
14414
14416 "Unexpected VTs!");
14417 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
14418 // Targets have to explicitly opt-in for extending vector loads.
14419 assert(LoadExtType == ISD::NON_EXTLOAD &&
14420 "Unexpected extending MGATHER/VP_GATHER");
14421
14422 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
14423 // the selection of the masked intrinsics doesn't do this for us.
14424 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
14425
14426 MVT ContainerVT = VT;
14427 if (VT.isFixedLengthVector()) {
14428 ContainerVT = getContainerForFixedLengthVector(VT);
14429 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
14430 ContainerVT.getVectorElementCount());
14431
14432 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
14433
14434 if (!IsUnmasked) {
14435 MVT MaskVT = getMaskTypeFor(ContainerVT);
14436 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14437 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
14438 }
14439 }
14440
14441 if (!VL)
14442 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
14443
14444 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
14445 IndexVT = IndexVT.changeVectorElementType(XLenVT);
14446 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
14447 }
14448
14449 unsigned IntID =
14450 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
14451 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
14452 if (IsUnmasked)
14453 Ops.push_back(DAG.getUNDEF(ContainerVT));
14454 else
14455 Ops.push_back(PassThru);
14456 Ops.push_back(BasePtr);
14457 Ops.push_back(Index);
14458 if (!IsUnmasked)
14459 Ops.push_back(Mask);
14460 Ops.push_back(VL);
14461 if (!IsUnmasked)
14462 Ops.push_back(DAG.getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT));
14463
14464 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
14465 SDValue Result =
14466 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
14467 Chain = Result.getValue(1);
14468
14469 if (VT.isFixedLengthVector())
14470 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
14471
14472 return DAG.getMergeValues({Result, Chain}, DL);
14473}
14474
14475// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
14476// matched to a RVV indexed store. The RVV indexed store instructions only
14477// support the "unsigned unscaled" addressing mode; indices are implicitly
14478// zero-extended or truncated to XLEN and are treated as byte offsets. Any
14479// signed or scaled indexing is extended to the XLEN value type and scaled
14480// accordingly.
14481SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
14482 SelectionDAG &DAG) const {
14483 SDLoc DL(Op);
14484 const auto *MemSD = cast<MemSDNode>(Op.getNode());
14485 EVT MemVT = MemSD->getMemoryVT();
14486 MachineMemOperand *MMO = MemSD->getMemOperand();
14487 SDValue Chain = MemSD->getChain();
14488 SDValue BasePtr = MemSD->getBasePtr();
14489
14490 [[maybe_unused]] bool IsTruncatingStore = false;
14491 SDValue Index, Mask, Val, VL;
14492
14493 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
14494 Index = VPSN->getIndex();
14495 Mask = VPSN->getMask();
14496 Val = VPSN->getValue();
14497 VL = VPSN->getVectorLength();
14498 // VP doesn't support truncating stores.
14499 IsTruncatingStore = false;
14500 } else {
14501 // Else it must be a MSCATTER.
14502 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
14503 Index = MSN->getIndex();
14504 Mask = MSN->getMask();
14505 Val = MSN->getValue();
14506 IsTruncatingStore = MSN->isTruncatingStore();
14507 }
14508
14509 MVT VT = Val.getSimpleValueType();
14510 MVT IndexVT = Index.getSimpleValueType();
14511 MVT XLenVT = Subtarget.getXLenVT();
14512
14514 "Unexpected VTs!");
14515 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
14516 // Targets have to explicitly opt-in for extending vector loads and
14517 // truncating vector stores.
14518 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
14519
14520 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
14521 // the selection of the masked intrinsics doesn't do this for us.
14522 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
14523
14524 MVT ContainerVT = VT;
14525 if (VT.isFixedLengthVector()) {
14526 ContainerVT = getContainerForFixedLengthVector(VT);
14527 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
14528 ContainerVT.getVectorElementCount());
14529
14530 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
14531 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
14532
14533 if (!IsUnmasked) {
14534 MVT MaskVT = getMaskTypeFor(ContainerVT);
14535 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
14536 }
14537 }
14538
14539 if (!VL)
14540 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
14541
14542 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
14543 IndexVT = IndexVT.changeVectorElementType(XLenVT);
14544 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
14545 }
14546
14547 unsigned IntID =
14548 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
14549 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
14550 Ops.push_back(Val);
14551 Ops.push_back(BasePtr);
14552 Ops.push_back(Index);
14553 if (!IsUnmasked)
14554 Ops.push_back(Mask);
14555 Ops.push_back(VL);
14556
14558 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
14559}
14560
14561SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
14562 SelectionDAG &DAG) const {
14563 const MVT XLenVT = Subtarget.getXLenVT();
14564 SDLoc DL(Op);
14565 SDValue Chain = Op->getOperand(0);
14566 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
14567 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
14568 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
14569
14570 // Encoding used for rounding mode in RISC-V differs from that used in
14571 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
14572 // table, which consists of a sequence of 4-bit fields, each representing
14573 // corresponding FLT_ROUNDS mode.
14574 static const int Table =
14580
14581 SDValue Shift =
14582 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
14583 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
14584 DAG.getConstant(Table, DL, XLenVT), Shift);
14585 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
14586 DAG.getConstant(7, DL, XLenVT));
14587
14588 return DAG.getMergeValues({Masked, Chain}, DL);
14589}
14590
14591SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
14592 SelectionDAG &DAG) const {
14593 const MVT XLenVT = Subtarget.getXLenVT();
14594 SDLoc DL(Op);
14595 SDValue Chain = Op->getOperand(0);
14596 SDValue RMValue = Op->getOperand(1);
14597 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
14598
14599 // Encoding used for rounding mode in RISC-V differs from that used in
14600 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
14601 // a table, which consists of a sequence of 4-bit fields, each representing
14602 // corresponding RISC-V mode.
14603 static const unsigned Table =
14609
14610 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
14611
14612 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
14613 DAG.getConstant(2, DL, XLenVT));
14614 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
14615 DAG.getConstant(Table, DL, XLenVT), Shift);
14616 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
14617 DAG.getConstant(0x7, DL, XLenVT));
14618 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
14619 RMValue);
14620}
14621
14622SDValue RISCVTargetLowering::lowerGET_FPENV(SDValue Op,
14623 SelectionDAG &DAG) const {
14624 const MVT XLenVT = Subtarget.getXLenVT();
14625 SDLoc DL(Op);
14626 SDValue Chain = Op->getOperand(0);
14627 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14628 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
14629 return DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
14630}
14631
14632SDValue RISCVTargetLowering::lowerSET_FPENV(SDValue Op,
14633 SelectionDAG &DAG) const {
14634 const MVT XLenVT = Subtarget.getXLenVT();
14635 SDLoc DL(Op);
14636 SDValue Chain = Op->getOperand(0);
14637 SDValue EnvValue = Op->getOperand(1);
14638 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14639
14640 EnvValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, EnvValue);
14641 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
14642 EnvValue);
14643}
14644
14645SDValue RISCVTargetLowering::lowerRESET_FPENV(SDValue Op,
14646 SelectionDAG &DAG) const {
14647 const MVT XLenVT = Subtarget.getXLenVT();
14648 SDLoc DL(Op);
14649 SDValue Chain = Op->getOperand(0);
14650 SDValue EnvValue = DAG.getRegister(RISCV::X0, XLenVT);
14651 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14652
14653 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
14654 EnvValue);
14655}
14656
14659
14660SDValue RISCVTargetLowering::lowerGET_FPMODE(SDValue Op,
14661 SelectionDAG &DAG) const {
14662 const MVT XLenVT = Subtarget.getXLenVT();
14663 SDLoc DL(Op);
14664 SDValue Chain = Op->getOperand(0);
14665 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14666 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
14667 SDValue Result = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
14668 Chain = Result.getValue(1);
14669 return DAG.getMergeValues({Result, Chain}, DL);
14670}
14671
14672SDValue RISCVTargetLowering::lowerSET_FPMODE(SDValue Op,
14673 SelectionDAG &DAG) const {
14674 const MVT XLenVT = Subtarget.getXLenVT();
14675 const uint64_t ModeMaskValue = Subtarget.is64Bit() ? ModeMask64 : ModeMask32;
14676 SDLoc DL(Op);
14677 SDValue Chain = Op->getOperand(0);
14678 SDValue EnvValue = Op->getOperand(1);
14679 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14680 SDValue ModeMask = DAG.getConstant(ModeMaskValue, DL, XLenVT);
14681
14682 EnvValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, EnvValue);
14683 EnvValue = DAG.getNode(ISD::AND, DL, XLenVT, EnvValue, ModeMask);
14684 Chain = DAG.getNode(RISCVISD::CLEAR_CSR, DL, MVT::Other, Chain, SysRegNo,
14685 ModeMask);
14686 return DAG.getNode(RISCVISD::SET_CSR, DL, MVT::Other, Chain, SysRegNo,
14687 EnvValue);
14688}
14689
14690SDValue RISCVTargetLowering::lowerRESET_FPMODE(SDValue Op,
14691 SelectionDAG &DAG) const {
14692 const MVT XLenVT = Subtarget.getXLenVT();
14693 const uint64_t ModeMaskValue = Subtarget.is64Bit() ? ModeMask64 : ModeMask32;
14694 SDLoc DL(Op);
14695 SDValue Chain = Op->getOperand(0);
14696 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::fcsr, DL, XLenVT);
14697 SDValue ModeMask = DAG.getConstant(ModeMaskValue, DL, XLenVT);
14698
14699 return DAG.getNode(RISCVISD::CLEAR_CSR, DL, MVT::Other, Chain, SysRegNo,
14700 ModeMask);
14701}
14702
14703SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
14704 SelectionDAG &DAG) const {
14705 MachineFunction &MF = DAG.getMachineFunction();
14706
14707 bool isRISCV64 = Subtarget.is64Bit();
14708 EVT PtrVT = getPointerTy(DAG.getDataLayout());
14709
14710 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
14711 return DAG.getFrameIndex(FI, PtrVT);
14712}
14713
14714// Returns the opcode of the target-specific SDNode that implements the 32-bit
14715// form of the given Opcode.
14716static unsigned getRISCVWOpcode(unsigned Opcode) {
14717 switch (Opcode) {
14718 default:
14719 llvm_unreachable("Unexpected opcode");
14720 case ISD::SHL:
14721 return RISCVISD::SLLW;
14722 case ISD::SRA:
14723 return RISCVISD::SRAW;
14724 case ISD::SRL:
14725 return RISCVISD::SRLW;
14726 case ISD::SDIV:
14727 return RISCVISD::DIVW;
14728 case ISD::UDIV:
14729 return RISCVISD::DIVUW;
14730 case ISD::UREM:
14731 return RISCVISD::REMUW;
14732 case ISD::ROTL:
14733 return RISCVISD::ROLW;
14734 case ISD::ROTR:
14735 return RISCVISD::RORW;
14736 }
14737}
14738
14739// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
14740// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
14741// otherwise be promoted to i64, making it difficult to select the
14742// SLLW/DIVUW/.../*W later one because the fact the operation was originally of
14743// type i8/i16/i32 is lost.
14745 unsigned ExtOpc = ISD::ANY_EXTEND) {
14746 SDLoc DL(N);
14747 unsigned WOpcode = getRISCVWOpcode(N->getOpcode());
14748 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
14749 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
14750 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
14751 // ReplaceNodeResults requires we maintain the same type for the return value.
14752 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
14753}
14754
14755// Converts the given 32-bit operation to a i64 operation with signed extension
14756// semantic to reduce the signed extension instructions.
14758 SDLoc DL(N);
14759 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
14760 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
14761 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
14762 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
14763 DAG.getValueType(MVT::i32));
14764 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
14765}
14766
14769 SelectionDAG &DAG) const {
14770 SDLoc DL(N);
14771 switch (N->getOpcode()) {
14772 default:
14773 llvm_unreachable("Don't know how to custom type legalize this operation!");
14776 case ISD::FP_TO_SINT:
14777 case ISD::FP_TO_UINT: {
14778 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14779 "Unexpected custom legalisation");
14780 bool IsStrict = N->isStrictFPOpcode();
14781 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
14782 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
14783 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
14784 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
14786 if (!isTypeLegal(Op0.getValueType()))
14787 return;
14788 if (IsStrict) {
14789 SDValue Chain = N->getOperand(0);
14790 // In absence of Zfh, promote f16 to f32, then convert.
14791 if (Op0.getValueType() == MVT::f16 &&
14792 !Subtarget.hasStdExtZfhOrZhinx()) {
14793 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
14794 {Chain, Op0});
14795 Chain = Op0.getValue(1);
14796 }
14797 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
14798 : RISCVISD::STRICT_FCVT_WU_RV64;
14799 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
14800 SDValue Res = DAG.getNode(
14801 Opc, DL, VTs, Chain, Op0,
14802 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
14803 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14804 Results.push_back(Res.getValue(1));
14805 return;
14806 }
14807 // For bf16, or f16 in absence of Zfh, promote [b]f16 to f32 and then
14808 // convert.
14809 if ((Op0.getValueType() == MVT::f16 &&
14810 !Subtarget.hasStdExtZfhOrZhinx()) ||
14811 Op0.getValueType() == MVT::bf16)
14812 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
14813
14814 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
14815 SDValue Res =
14816 DAG.getNode(Opc, DL, MVT::i64, Op0,
14817 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
14818 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14819 return;
14820 }
14821 // If the FP type needs to be softened, emit a library call using the 'si'
14822 // version. If we left it to default legalization we'd end up with 'di'. If
14823 // the FP type doesn't need to be softened just let generic type
14824 // legalization promote the result type.
14825 RTLIB::Libcall LC;
14826 if (IsSigned)
14827 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
14828 else
14829 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
14830 MakeLibCallOptions CallOptions;
14831 EVT OpVT = Op0.getValueType();
14832 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0));
14833 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
14834 SDValue Result;
14835 std::tie(Result, Chain) =
14836 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
14837 Results.push_back(Result);
14838 if (IsStrict)
14839 Results.push_back(Chain);
14840 break;
14841 }
14842 case ISD::LROUND: {
14843 SDValue Op0 = N->getOperand(0);
14844 EVT Op0VT = Op0.getValueType();
14845 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
14847 if (!isTypeLegal(Op0VT))
14848 return;
14849
14850 // In absence of Zfh, promote f16 to f32, then convert.
14851 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
14852 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
14853
14854 SDValue Res =
14855 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
14856 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
14857 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
14858 return;
14859 }
14860 // If the FP type needs to be softened, emit a library call to lround. We'll
14861 // need to truncate the result. We assume any value that doesn't fit in i32
14862 // is allowed to return an unspecified value.
14863 RTLIB::Libcall LC =
14864 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
14865 MakeLibCallOptions CallOptions;
14866 EVT OpVT = Op0.getValueType();
14867 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
14868 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
14869 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
14870 Results.push_back(Result);
14871 break;
14872 }
14875 assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "
14876 "has custom type legalization on riscv32");
14877
14878 SDValue LoCounter, HiCounter;
14879 MVT XLenVT = Subtarget.getXLenVT();
14880 if (N->getOpcode() == ISD::READCYCLECOUNTER) {
14881 LoCounter = DAG.getTargetConstant(RISCVSysReg::cycle, DL, XLenVT);
14882 HiCounter = DAG.getTargetConstant(RISCVSysReg::cycleh, DL, XLenVT);
14883 } else {
14884 LoCounter = DAG.getTargetConstant(RISCVSysReg::time, DL, XLenVT);
14885 HiCounter = DAG.getTargetConstant(RISCVSysReg::timeh, DL, XLenVT);
14886 }
14887 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
14888 SDValue RCW = DAG.getNode(RISCVISD::READ_COUNTER_WIDE, DL, VTs,
14889 N->getOperand(0), LoCounter, HiCounter);
14890
14891 Results.push_back(
14892 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
14893 Results.push_back(RCW.getValue(2));
14894 break;
14895 }
14896 case ISD::LOAD: {
14897 if (!ISD::isNON_EXTLoad(N))
14898 return;
14899
14900 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
14901 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
14903
14904 if (N->getValueType(0) == MVT::i64) {
14905 assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&
14906 "Unexpected custom legalisation");
14907
14908 if (Ld->getAlign() < Subtarget.getZilsdAlign())
14909 return;
14910
14911 SDLoc DL(N);
14912 SDValue Result = DAG.getMemIntrinsicNode(
14913 RISCVISD::LD_RV32, DL,
14914 DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),
14915 {Ld->getChain(), Ld->getBasePtr()}, MVT::i64, Ld->getMemOperand());
14916 SDValue Lo = Result.getValue(0);
14917 SDValue Hi = Result.getValue(1);
14918 SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
14919 Results.append({Pair, Result.getValue(2)});
14920 return;
14921 }
14922
14923 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14924 "Unexpected custom legalisation");
14925
14926 SDLoc dl(N);
14927 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
14928 Ld->getBasePtr(), Ld->getMemoryVT(),
14929 Ld->getMemOperand());
14930 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
14931 Results.push_back(Res.getValue(1));
14932 return;
14933 }
14934 case ISD::MUL: {
14935 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
14936 unsigned XLen = Subtarget.getXLen();
14937 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
14938 if (Size > XLen) {
14939 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
14940 SDValue LHS = N->getOperand(0);
14941 SDValue RHS = N->getOperand(1);
14942 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
14943
14944 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
14945 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
14946 // We need exactly one side to be unsigned.
14947 if (LHSIsU == RHSIsU)
14948 return;
14949
14950 auto MakeMULPair = [&](SDValue S, SDValue U) {
14951 MVT XLenVT = Subtarget.getXLenVT();
14952 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
14953 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
14954 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
14955 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
14956 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
14957 };
14958
14959 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
14960 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
14961
14962 // The other operand should be signed, but still prefer MULH when
14963 // possible.
14964 if (RHSIsU && LHSIsS && !RHSIsS)
14965 Results.push_back(MakeMULPair(LHS, RHS));
14966 else if (LHSIsU && RHSIsS && !LHSIsS)
14967 Results.push_back(MakeMULPair(RHS, LHS));
14968
14969 return;
14970 }
14971 [[fallthrough]];
14972 }
14973 case ISD::ADD:
14974 case ISD::SUB:
14975 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14976 "Unexpected custom legalisation");
14977 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
14978 break;
14979 case ISD::SHL:
14980 case ISD::SRA:
14981 case ISD::SRL:
14982 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
14983 "Unexpected custom legalisation");
14984 if (N->getOperand(1).getOpcode() != ISD::Constant) {
14985 // If we can use a BSET instruction, allow default promotion to apply.
14986 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
14987 isOneConstant(N->getOperand(0)))
14988 break;
14989 Results.push_back(customLegalizeToWOp(N, DAG));
14990 break;
14991 }
14992
14993 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
14994 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
14995 // shift amount.
14996 if (N->getOpcode() == ISD::SHL) {
14997 SDLoc DL(N);
14998 SDValue NewOp0 =
14999 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
15000 SDValue NewOp1 =
15001 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
15002 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
15003 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
15004 DAG.getValueType(MVT::i32));
15005 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
15006 }
15007
15008 break;
15009 case ISD::ROTL:
15010 case ISD::ROTR:
15011 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
15012 "Unexpected custom legalisation");
15013 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
15014 Subtarget.hasVendorXTHeadBb()) &&
15015 "Unexpected custom legalization");
15016 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
15017 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
15018 return;
15019 Results.push_back(customLegalizeToWOp(N, DAG));
15020 break;
15021 case ISD::CTTZ:
15023 case ISD::CTLZ:
15024 case ISD::CTLZ_ZERO_UNDEF: {
15025 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
15026 "Unexpected custom legalisation");
15027
15028 SDValue NewOp0 =
15029 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
15030 bool IsCTZ =
15031 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
15032
15033 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
15034 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
15035 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15036 return;
15037 }
15038 case ISD::SDIV:
15039 case ISD::UDIV:
15040 case ISD::UREM: {
15041 MVT VT = N->getSimpleValueType(0);
15042 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
15043 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
15044 "Unexpected custom legalisation");
15045 // Don't promote division/remainder by constant since we should expand those
15046 // to multiply by magic constant.
15047 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
15048 if (N->getOperand(1).getOpcode() == ISD::Constant &&
15049 !isIntDivCheap(N->getValueType(0), Attr))
15050 return;
15051
15052 // If the input is i32, use ANY_EXTEND since the W instructions don't read
15053 // the upper 32 bits. For other types we need to sign or zero extend
15054 // based on the opcode.
15055 unsigned ExtOpc = ISD::ANY_EXTEND;
15056 if (VT != MVT::i32)
15057 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
15059
15060 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
15061 break;
15062 }
15063 case ISD::SADDO: {
15064 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
15065 "Unexpected custom legalisation");
15066
15067 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
15068 // use the default legalization.
15069 if (!isa<ConstantSDNode>(N->getOperand(1)))
15070 return;
15071
15072 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
15073 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
15074 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
15075 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
15076 DAG.getValueType(MVT::i32));
15077
15078 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
15079
15080 // For an addition, the result should be less than one of the operands (LHS)
15081 // if and only if the other operand (RHS) is negative, otherwise there will
15082 // be overflow.
15083 // For a subtraction, the result should be less than one of the operands
15084 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
15085 // otherwise there will be overflow.
15086 EVT OType = N->getValueType(1);
15087 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
15088 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
15089
15090 SDValue Overflow =
15091 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
15092 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15093 Results.push_back(Overflow);
15094 return;
15095 }
15096 case ISD::UADDO:
15097 case ISD::USUBO: {
15098 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
15099 "Unexpected custom legalisation");
15100 bool IsAdd = N->getOpcode() == ISD::UADDO;
15101 // Create an ADDW or SUBW.
15102 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
15103 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15104 SDValue Res =
15105 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
15106 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
15107 DAG.getValueType(MVT::i32));
15108
15109 SDValue Overflow;
15110 if (IsAdd && isOneConstant(RHS)) {
15111 // Special case uaddo X, 1 overflowed if the addition result is 0.
15112 // The general case (X + C) < C is not necessarily beneficial. Although we
15113 // reduce the live range of X, we may introduce the materialization of
15114 // constant C, especially when the setcc result is used by branch. We have
15115 // no compare with constant and branch instructions.
15116 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
15117 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
15118 } else if (IsAdd && isAllOnesConstant(RHS)) {
15119 // Special case uaddo X, -1 overflowed if X != 0.
15120 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
15121 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
15122 } else {
15123 // Sign extend the LHS and perform an unsigned compare with the ADDW
15124 // result. Since the inputs are sign extended from i32, this is equivalent
15125 // to comparing the lower 32 bits.
15126 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
15127 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
15128 IsAdd ? ISD::SETULT : ISD::SETUGT);
15129 }
15130
15131 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15132 Results.push_back(Overflow);
15133 return;
15134 }
15135 case ISD::UADDSAT:
15136 case ISD::USUBSAT: {
15137 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
15138 !Subtarget.hasStdExtZbb() && "Unexpected custom legalisation");
15139 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
15140 // promotion for UADDO/USUBO.
15141 Results.push_back(expandAddSubSat(N, DAG));
15142 return;
15143 }
15144 case ISD::SADDSAT:
15145 case ISD::SSUBSAT: {
15146 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
15147 "Unexpected custom legalisation");
15148 Results.push_back(expandAddSubSat(N, DAG));
15149 return;
15150 }
15151 case ISD::ABS: {
15152 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
15153 "Unexpected custom legalisation");
15154
15155 if (Subtarget.hasStdExtP()) {
15156 SDValue Src =
15157 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
15158 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
15159 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
15160 return;
15161 }
15162
15163 if (Subtarget.hasStdExtZbb()) {
15164 // Emit a special node that will be expanded to NEGW+MAX at isel.
15165 // This allows us to remember that the result is sign extended. Expanding
15166 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
15167 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
15168 N->getOperand(0));
15169 SDValue Abs = DAG.getNode(RISCVISD::NEGW_MAX, DL, MVT::i64, Src);
15170 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
15171 return;
15172 }
15173
15174 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
15175 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
15176
15177 // Freeze the source so we can increase it's use count.
15178 Src = DAG.getFreeze(Src);
15179
15180 // Copy sign bit to all bits using the sraiw pattern.
15181 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
15182 DAG.getValueType(MVT::i32));
15183 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
15184 DAG.getConstant(31, DL, MVT::i64));
15185
15186 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
15187 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
15188
15189 // NOTE: The result is only required to be anyextended, but sext is
15190 // consistent with type legalization of sub.
15191 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
15192 DAG.getValueType(MVT::i32));
15193 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
15194 return;
15195 }
15196 case ISD::BITCAST: {
15197 EVT VT = N->getValueType(0);
15198 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
15199 SDValue Op0 = N->getOperand(0);
15200 EVT Op0VT = Op0.getValueType();
15201 MVT XLenVT = Subtarget.getXLenVT();
15202 if (VT == MVT::i16 &&
15203 ((Op0VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
15204 (Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
15205 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
15206 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
15207 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
15208 Subtarget.hasStdExtFOrZfinx()) {
15209 SDValue FPConv =
15210 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
15211 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
15212 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && !Subtarget.is64Bit() &&
15213 Subtarget.hasStdExtDOrZdinx()) {
15214 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
15215 DAG.getVTList(MVT::i32, MVT::i32), Op0);
15216 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
15217 NewReg.getValue(0), NewReg.getValue(1));
15218 Results.push_back(RetReg);
15219 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
15220 isTypeLegal(Op0VT)) {
15221 // Custom-legalize bitcasts from fixed-length vector types to illegal
15222 // scalar types in order to improve codegen. Bitcast the vector to a
15223 // one-element vector type whose element type is the same as the result
15224 // type, and extract the first element.
15225 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
15226 if (isTypeLegal(BVT)) {
15227 SDValue BVec = DAG.getBitcast(BVT, Op0);
15228 Results.push_back(DAG.getExtractVectorElt(DL, VT, BVec, 0));
15229 }
15230 }
15231 break;
15232 }
15233 case ISD::BITREVERSE: {
15234 assert(N->getValueType(0) == MVT::i8 && Subtarget.hasStdExtZbkb() &&
15235 "Unexpected custom legalisation");
15236 MVT XLenVT = Subtarget.getXLenVT();
15237 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
15238 SDValue NewRes = DAG.getNode(RISCVISD::BREV8, DL, XLenVT, NewOp);
15239 // ReplaceNodeResults requires we maintain the same type for the return
15240 // value.
15241 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, NewRes));
15242 break;
15243 }
15244 case RISCVISD::BREV8:
15245 case RISCVISD::ORC_B: {
15246 MVT VT = N->getSimpleValueType(0);
15247 MVT XLenVT = Subtarget.getXLenVT();
15248 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
15249 "Unexpected custom legalisation");
15250 assert(((N->getOpcode() == RISCVISD::BREV8 && Subtarget.hasStdExtZbkb()) ||
15251 (N->getOpcode() == RISCVISD::ORC_B && Subtarget.hasStdExtZbb())) &&
15252 "Unexpected extension");
15253 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
15254 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
15255 // ReplaceNodeResults requires we maintain the same type for the return
15256 // value.
15257 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
15258 break;
15259 }
15260 case RISCVISD::PASUB:
15261 case RISCVISD::PASUBU: {
15262 MVT VT = N->getSimpleValueType(0);
15263 SDValue Op0 = N->getOperand(0);
15264 SDValue Op1 = N->getOperand(1);
15265 assert(VT == MVT::v2i16 || VT == MVT::v4i8);
15266 MVT NewVT = MVT::v4i16;
15267 if (VT == MVT::v4i8)
15268 NewVT = MVT::v8i8;
15269 SDValue Undef = DAG.getUNDEF(VT);
15270 Op0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, NewVT, {Op0, Undef});
15271 Op1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, NewVT, {Op1, Undef});
15272 Results.push_back(DAG.getNode(N->getOpcode(), DL, NewVT, {Op0, Op1}));
15273 return;
15274 }
15276 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
15277 // type is illegal (currently only vXi64 RV32).
15278 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
15279 // transferred to the destination register. We issue two of these from the
15280 // upper- and lower- halves of the SEW-bit vector element, slid down to the
15281 // first element.
15282 SDValue Vec = N->getOperand(0);
15283 SDValue Idx = N->getOperand(1);
15284
15285 // The vector type hasn't been legalized yet so we can't issue target
15286 // specific nodes if it needs legalization.
15287 // FIXME: We would manually legalize if it's important.
15288 if (!isTypeLegal(Vec.getValueType()))
15289 return;
15290
15291 MVT VecVT = Vec.getSimpleValueType();
15292
15293 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
15294 VecVT.getVectorElementType() == MVT::i64 &&
15295 "Unexpected EXTRACT_VECTOR_ELT legalization");
15296
15297 // If this is a fixed vector, we need to convert it to a scalable vector.
15298 MVT ContainerVT = VecVT;
15299 if (VecVT.isFixedLengthVector()) {
15300 ContainerVT = getContainerForFixedLengthVector(VecVT);
15301 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
15302 }
15303
15304 MVT XLenVT = Subtarget.getXLenVT();
15305
15306 // Use a VL of 1 to avoid processing more elements than we need.
15307 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
15308
15309 // Unless the index is known to be 0, we must slide the vector down to get
15310 // the desired element into index 0.
15311 if (!isNullConstant(Idx)) {
15312 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
15313 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
15314 }
15315
15316 // Extract the lower XLEN bits of the correct vector element.
15317 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
15318
15319 // To extract the upper XLEN bits of the vector element, shift the first
15320 // element right by 32 bits and re-extract the lower XLEN bits.
15321 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
15322 DAG.getUNDEF(ContainerVT),
15323 DAG.getConstant(32, DL, XLenVT), VL);
15324 SDValue LShr32 =
15325 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
15326 DAG.getUNDEF(ContainerVT), Mask, VL);
15327
15328 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
15329
15330 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
15331 break;
15332 }
15334 unsigned IntNo = N->getConstantOperandVal(0);
15335 switch (IntNo) {
15336 default:
15338 "Don't know how to custom type legalize this intrinsic!");
15339 case Intrinsic::experimental_get_vector_length: {
15340 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
15341 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15342 return;
15343 }
15344 case Intrinsic::experimental_cttz_elts: {
15345 SDValue Res = lowerCttzElts(N, DAG, Subtarget);
15346 Results.push_back(
15347 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res));
15348 return;
15349 }
15350 case Intrinsic::riscv_orc_b:
15351 case Intrinsic::riscv_brev8:
15352 case Intrinsic::riscv_sha256sig0:
15353 case Intrinsic::riscv_sha256sig1:
15354 case Intrinsic::riscv_sha256sum0:
15355 case Intrinsic::riscv_sha256sum1:
15356 case Intrinsic::riscv_sm3p0:
15357 case Intrinsic::riscv_sm3p1: {
15358 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15359 return;
15360 unsigned Opc;
15361 switch (IntNo) {
15362 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
15363 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
15364 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
15365 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
15366 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
15367 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
15368 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
15369 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
15370 }
15371
15372 SDValue NewOp =
15373 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15374 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
15375 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15376 return;
15377 }
15378 case Intrinsic::riscv_sm4ks:
15379 case Intrinsic::riscv_sm4ed: {
15380 unsigned Opc =
15381 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
15382 SDValue NewOp0 =
15383 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15384 SDValue NewOp1 =
15385 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
15386 SDValue Res =
15387 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
15388 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15389 return;
15390 }
15391 case Intrinsic::riscv_mopr: {
15392 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15393 return;
15394 SDValue NewOp =
15395 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15396 SDValue Res = DAG.getNode(
15397 RISCVISD::MOP_R, DL, MVT::i64, NewOp,
15398 DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
15399 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15400 return;
15401 }
15402 case Intrinsic::riscv_moprr: {
15403 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15404 return;
15405 SDValue NewOp0 =
15406 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15407 SDValue NewOp1 =
15408 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
15409 SDValue Res = DAG.getNode(
15410 RISCVISD::MOP_RR, DL, MVT::i64, NewOp0, NewOp1,
15411 DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
15412 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15413 return;
15414 }
15415 case Intrinsic::riscv_clmul: {
15416 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15417 return;
15418
15419 SDValue NewOp0 =
15420 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15421 SDValue NewOp1 =
15422 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
15423 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
15424 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15425 return;
15426 }
15427 case Intrinsic::riscv_clmulh:
15428 case Intrinsic::riscv_clmulr: {
15429 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
15430 return;
15431
15432 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
15433 // to the full 128-bit clmul result of multiplying two xlen values.
15434 // Perform clmulr or clmulh on the shifted values. Finally, extract the
15435 // upper 32 bits.
15436 //
15437 // The alternative is to mask the inputs to 32 bits and use clmul, but
15438 // that requires two shifts to mask each input without zext.w.
15439 // FIXME: If the inputs are known zero extended or could be freely
15440 // zero extended, the mask form would be better.
15441 SDValue NewOp0 =
15442 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
15443 SDValue NewOp1 =
15444 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
15445 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
15446 DAG.getConstant(32, DL, MVT::i64));
15447 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
15448 DAG.getConstant(32, DL, MVT::i64));
15449 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
15450 : RISCVISD::CLMULR;
15451 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
15452 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
15453 DAG.getConstant(32, DL, MVT::i64));
15454 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
15455 return;
15456 }
15457 case Intrinsic::riscv_vmv_x_s: {
15458 EVT VT = N->getValueType(0);
15459 MVT XLenVT = Subtarget.getXLenVT();
15460 if (VT.bitsLT(XLenVT)) {
15461 // Simple case just extract using vmv.x.s and truncate.
15462 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
15463 Subtarget.getXLenVT(), N->getOperand(1));
15464 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
15465 return;
15466 }
15467
15468 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
15469 "Unexpected custom legalization");
15470
15471 // We need to do the move in two steps.
15472 SDValue Vec = N->getOperand(1);
15473 MVT VecVT = Vec.getSimpleValueType();
15474
15475 // First extract the lower XLEN bits of the element.
15476 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
15477
15478 // To extract the upper XLEN bits of the vector element, shift the first
15479 // element right by 32 bits and re-extract the lower XLEN bits.
15480 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
15481
15482 SDValue ThirtyTwoV =
15483 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
15484 DAG.getConstant(32, DL, XLenVT), VL);
15485 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
15486 DAG.getUNDEF(VecVT), Mask, VL);
15487 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
15488
15489 Results.push_back(
15490 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
15491 break;
15492 }
15493 }
15494 break;
15495 }
15496 case ISD::VECREDUCE_ADD:
15497 case ISD::VECREDUCE_AND:
15498 case ISD::VECREDUCE_OR:
15499 case ISD::VECREDUCE_XOR:
15504 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
15505 Results.push_back(V);
15506 break;
15507 case ISD::VP_REDUCE_ADD:
15508 case ISD::VP_REDUCE_AND:
15509 case ISD::VP_REDUCE_OR:
15510 case ISD::VP_REDUCE_XOR:
15511 case ISD::VP_REDUCE_SMAX:
15512 case ISD::VP_REDUCE_UMAX:
15513 case ISD::VP_REDUCE_SMIN:
15514 case ISD::VP_REDUCE_UMIN:
15515 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
15516 Results.push_back(V);
15517 break;
15518 case ISD::GET_ROUNDING: {
15519 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
15520 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
15521 Results.push_back(Res.getValue(0));
15522 Results.push_back(Res.getValue(1));
15523 break;
15524 }
15525 }
15526}
15527
15528/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
15529/// which corresponds to it.
15530static unsigned getVecReduceOpcode(unsigned Opc) {
15531 switch (Opc) {
15532 default:
15533 llvm_unreachable("Unhandled binary to transform reduction");
15534 case ISD::ADD:
15535 return ISD::VECREDUCE_ADD;
15536 case ISD::UMAX:
15537 return ISD::VECREDUCE_UMAX;
15538 case ISD::SMAX:
15539 return ISD::VECREDUCE_SMAX;
15540 case ISD::UMIN:
15541 return ISD::VECREDUCE_UMIN;
15542 case ISD::SMIN:
15543 return ISD::VECREDUCE_SMIN;
15544 case ISD::AND:
15545 return ISD::VECREDUCE_AND;
15546 case ISD::OR:
15547 return ISD::VECREDUCE_OR;
15548 case ISD::XOR:
15549 return ISD::VECREDUCE_XOR;
15550 case ISD::FADD:
15551 // Note: This is the associative form of the generic reduction opcode.
15552 return ISD::VECREDUCE_FADD;
15553 case ISD::FMAXNUM:
15554 return ISD::VECREDUCE_FMAX;
15555 case ISD::FMINNUM:
15556 return ISD::VECREDUCE_FMIN;
15557 }
15558}
15559
15560/// Perform two related transforms whose purpose is to incrementally recognize
15561/// an explode_vector followed by scalar reduction as a vector reduction node.
15562/// This exists to recover from a deficiency in SLP which can't handle
15563/// forests with multiple roots sharing common nodes. In some cases, one
15564/// of the trees will be vectorized, and the other will remain (unprofitably)
15565/// scalarized.
15566static SDValue
15568 const RISCVSubtarget &Subtarget) {
15569
15570 // This transforms need to run before all integer types have been legalized
15571 // to i64 (so that the vector element type matches the add type), and while
15572 // it's safe to introduce odd sized vector types.
15574 return SDValue();
15575
15576 // Without V, this transform isn't useful. We could form the (illegal)
15577 // operations and let them be scalarized again, but there's really no point.
15578 if (!Subtarget.hasVInstructions())
15579 return SDValue();
15580
15581 const SDLoc DL(N);
15582 const EVT VT = N->getValueType(0);
15583 const unsigned Opc = N->getOpcode();
15584
15585 if (!VT.isInteger()) {
15586 switch (Opc) {
15587 default:
15588 return SDValue();
15589 case ISD::FADD:
15590 // For FADD, we only handle the case with reassociation allowed. We
15591 // could handle strict reduction order, but at the moment, there's no
15592 // known reason to, and the complexity isn't worth it.
15593 if (!N->getFlags().hasAllowReassociation())
15594 return SDValue();
15595 break;
15596 case ISD::FMAXNUM:
15597 case ISD::FMINNUM:
15598 break;
15599 }
15600 }
15601
15602 const unsigned ReduceOpc = getVecReduceOpcode(Opc);
15603 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
15604 "Inconsistent mappings");
15605 SDValue LHS = N->getOperand(0);
15606 SDValue RHS = N->getOperand(1);
15607
15608 if (!LHS.hasOneUse() || !RHS.hasOneUse())
15609 return SDValue();
15610
15611 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
15612 std::swap(LHS, RHS);
15613
15614 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15615 !isa<ConstantSDNode>(RHS.getOperand(1)))
15616 return SDValue();
15617
15618 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
15619 SDValue SrcVec = RHS.getOperand(0);
15620 EVT SrcVecVT = SrcVec.getValueType();
15621 assert(SrcVecVT.getVectorElementType() == VT);
15622 if (SrcVecVT.isScalableVector())
15623 return SDValue();
15624
15625 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
15626 return SDValue();
15627
15628 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
15629 // reduce_op (extract_subvector [2 x VT] from V). This will form the
15630 // root of our reduction tree. TODO: We could extend this to any two
15631 // adjacent aligned constant indices if desired.
15632 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15633 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
15634 uint64_t LHSIdx =
15635 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
15636 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
15637 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
15638 SDValue Vec = DAG.getExtractSubvector(DL, ReduceVT, SrcVec, 0);
15639 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
15640 }
15641 }
15642
15643 // Match (binop (reduce (extract_subvector V, 0),
15644 // (extract_vector_elt V, sizeof(SubVec))))
15645 // into a reduction of one more element from the original vector V.
15646 if (LHS.getOpcode() != ReduceOpc)
15647 return SDValue();
15648
15649 SDValue ReduceVec = LHS.getOperand(0);
15650 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
15651 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
15652 isNullConstant(ReduceVec.getOperand(1)) &&
15653 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
15654 // For illegal types (e.g. 3xi32), most will be combined again into a
15655 // wider (hopefully legal) type. If this is a terminal state, we are
15656 // relying on type legalization here to produce something reasonable
15657 // and this lowering quality could probably be improved. (TODO)
15658 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
15659 SDValue Vec = DAG.getExtractSubvector(DL, ReduceVT, SrcVec, 0);
15660 return DAG.getNode(ReduceOpc, DL, VT, Vec,
15661 ReduceVec->getFlags() & N->getFlags());
15662 }
15663
15664 return SDValue();
15665}
15666
15667
15668// Try to fold (<bop> x, (reduction.<bop> vec, start))
15670 const RISCVSubtarget &Subtarget) {
15671 auto BinOpToRVVReduce = [](unsigned Opc) {
15672 switch (Opc) {
15673 default:
15674 llvm_unreachable("Unhandled binary to transform reduction");
15675 case ISD::ADD:
15676 return RISCVISD::VECREDUCE_ADD_VL;
15677 case ISD::UMAX:
15678 return RISCVISD::VECREDUCE_UMAX_VL;
15679 case ISD::SMAX:
15680 return RISCVISD::VECREDUCE_SMAX_VL;
15681 case ISD::UMIN:
15682 return RISCVISD::VECREDUCE_UMIN_VL;
15683 case ISD::SMIN:
15684 return RISCVISD::VECREDUCE_SMIN_VL;
15685 case ISD::AND:
15686 return RISCVISD::VECREDUCE_AND_VL;
15687 case ISD::OR:
15688 return RISCVISD::VECREDUCE_OR_VL;
15689 case ISD::XOR:
15690 return RISCVISD::VECREDUCE_XOR_VL;
15691 case ISD::FADD:
15692 return RISCVISD::VECREDUCE_FADD_VL;
15693 case ISD::FMAXNUM:
15694 return RISCVISD::VECREDUCE_FMAX_VL;
15695 case ISD::FMINNUM:
15696 return RISCVISD::VECREDUCE_FMIN_VL;
15697 }
15698 };
15699
15700 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
15701 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15702 isNullConstant(V.getOperand(1)) &&
15703 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
15704 };
15705
15706 unsigned Opc = N->getOpcode();
15707 unsigned ReduceIdx;
15708 if (IsReduction(N->getOperand(0), Opc))
15709 ReduceIdx = 0;
15710 else if (IsReduction(N->getOperand(1), Opc))
15711 ReduceIdx = 1;
15712 else
15713 return SDValue();
15714
15715 // Skip if FADD disallows reassociation but the combiner needs.
15716 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
15717 return SDValue();
15718
15719 SDValue Extract = N->getOperand(ReduceIdx);
15720 SDValue Reduce = Extract.getOperand(0);
15721 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
15722 return SDValue();
15723
15724 SDValue ScalarV = Reduce.getOperand(2);
15725 EVT ScalarVT = ScalarV.getValueType();
15726 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
15727 ScalarV.getOperand(0)->isUndef() &&
15728 isNullConstant(ScalarV.getOperand(2)))
15729 ScalarV = ScalarV.getOperand(1);
15730
15731 // Make sure that ScalarV is a splat with VL=1.
15732 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
15733 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
15734 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
15735 return SDValue();
15736
15737 if (!isNonZeroAVL(ScalarV.getOperand(2)))
15738 return SDValue();
15739
15740 // Check the scalar of ScalarV is neutral element
15741 // TODO: Deal with value other than neutral element.
15742 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
15743 0))
15744 return SDValue();
15745
15746 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
15747 // FIXME: We might be able to improve this if operand 0 is undef.
15748 if (!isNonZeroAVL(Reduce.getOperand(5)))
15749 return SDValue();
15750
15751 SDValue NewStart = N->getOperand(1 - ReduceIdx);
15752
15753 SDLoc DL(N);
15754 SDValue NewScalarV =
15755 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
15756 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
15757
15758 // If we looked through an INSERT_SUBVECTOR we need to restore it.
15759 if (ScalarVT != ScalarV.getValueType())
15760 NewScalarV =
15761 DAG.getInsertSubvector(DL, DAG.getUNDEF(ScalarVT), NewScalarV, 0);
15762
15763 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
15764 NewScalarV, Reduce.getOperand(3),
15765 Reduce.getOperand(4), Reduce.getOperand(5)};
15766 SDValue NewReduce =
15767 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
15768 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
15769 Extract.getOperand(1));
15770}
15771
15772// Optimize (add (shl x, c0), (shl y, c1)) ->
15773// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
15774// or
15775// (SLLI (QC.SHLADD x, y, c1 - c0), c0), if 4 <= (c1-c0) <=31.
15777 const RISCVSubtarget &Subtarget) {
15778 // Perform this optimization only in the zba/xandesperf/xqciac/xtheadba
15779 // extension.
15780 if (!Subtarget.hasShlAdd(3))
15781 return SDValue();
15782
15783 // Skip for vector types and larger types.
15784 EVT VT = N->getValueType(0);
15785 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
15786 return SDValue();
15787
15788 // The two operand nodes must be SHL and have no other use.
15789 SDValue N0 = N->getOperand(0);
15790 SDValue N1 = N->getOperand(1);
15791 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
15792 !N0->hasOneUse() || !N1->hasOneUse())
15793 return SDValue();
15794
15795 // Check c0 and c1.
15796 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
15797 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
15798 if (!N0C || !N1C)
15799 return SDValue();
15800 int64_t C0 = N0C->getSExtValue();
15801 int64_t C1 = N1C->getSExtValue();
15802 if (C0 <= 0 || C1 <= 0)
15803 return SDValue();
15804
15805 int64_t Diff = std::abs(C0 - C1);
15806 if (!Subtarget.hasShlAdd(Diff))
15807 return SDValue();
15808
15809 // Build nodes.
15810 SDLoc DL(N);
15811 int64_t Bits = std::min(C0, C1);
15812 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
15813 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
15814 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, NL,
15815 DAG.getTargetConstant(Diff, DL, VT), NS);
15816 return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT));
15817}
15818
15819// Check if this SDValue is an add immediate that is fed by a shift of 1, 2,
15820// or 3.
15822 SelectionDAG &DAG) {
15823 using namespace llvm::SDPatternMatch;
15824
15825 // Looking for a reg-reg add and not an addi.
15826 if (isa<ConstantSDNode>(N->getOperand(1)))
15827 return SDValue();
15828
15829 // Based on testing it seems that performance degrades if the ADDI has
15830 // more than 2 uses.
15831 if (AddI->use_size() > 2)
15832 return SDValue();
15833
15834 APInt AddVal;
15835 SDValue SHLVal;
15836 if (!sd_match(AddI, m_Add(m_Value(SHLVal), m_ConstInt(AddVal))))
15837 return SDValue();
15838
15839 APInt VShift;
15840 if (!sd_match(SHLVal, m_OneUse(m_Shl(m_Value(), m_ConstInt(VShift)))))
15841 return SDValue();
15842
15843 if (VShift.slt(1) || VShift.sgt(3))
15844 return SDValue();
15845
15846 SDLoc DL(N);
15847 EVT VT = N->getValueType(0);
15848 // The shift must be positive but the add can be signed.
15849 uint64_t ShlConst = VShift.getZExtValue();
15850 int64_t AddConst = AddVal.getSExtValue();
15851
15852 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, SHLVal->getOperand(0),
15853 DAG.getTargetConstant(ShlConst, DL, VT), Other);
15854 return DAG.getNode(ISD::ADD, DL, VT, SHADD,
15855 DAG.getSignedConstant(AddConst, DL, VT));
15856}
15857
15858// Optimize (add (add (shl x, c0), c1), y) ->
15859// (ADDI (SH*ADD y, x), c1), if c0 equals to [1|2|3].
15861 const RISCVSubtarget &Subtarget) {
15862 // Perform this optimization only in the zba extension.
15863 if (!ReassocShlAddiAdd || !Subtarget.hasShlAdd(3))
15864 return SDValue();
15865
15866 // Skip for vector types and larger types.
15867 EVT VT = N->getValueType(0);
15868 if (VT != Subtarget.getXLenVT())
15869 return SDValue();
15870
15871 SDValue AddI = N->getOperand(0);
15872 SDValue Other = N->getOperand(1);
15873 if (SDValue V = combineShlAddIAddImpl(N, AddI, Other, DAG))
15874 return V;
15875 if (SDValue V = combineShlAddIAddImpl(N, Other, AddI, DAG))
15876 return V;
15877 return SDValue();
15878}
15879
15880// Combine a constant select operand into its use:
15881//
15882// (and (select cond, -1, c), x)
15883// -> (select cond, x, (and x, c)) [AllOnes=1]
15884// (or (select cond, 0, c), x)
15885// -> (select cond, x, (or x, c)) [AllOnes=0]
15886// (xor (select cond, 0, c), x)
15887// -> (select cond, x, (xor x, c)) [AllOnes=0]
15888// (add (select cond, 0, c), x)
15889// -> (select cond, x, (add x, c)) [AllOnes=0]
15890// (sub x, (select cond, 0, c))
15891// -> (select cond, x, (sub x, c)) [AllOnes=0]
15893 SelectionDAG &DAG, bool AllOnes,
15894 const RISCVSubtarget &Subtarget) {
15895 EVT VT = N->getValueType(0);
15896
15897 // Skip vectors.
15898 if (VT.isVector())
15899 return SDValue();
15900
15901 if (!Subtarget.hasConditionalMoveFusion()) {
15902 // (select cond, x, (and x, c)) has custom lowering with Zicond.
15903 if (!Subtarget.hasCZEROLike() || N->getOpcode() != ISD::AND)
15904 return SDValue();
15905
15906 // Maybe harmful when condition code has multiple use.
15907 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
15908 return SDValue();
15909
15910 // Maybe harmful when VT is wider than XLen.
15911 if (VT.getSizeInBits() > Subtarget.getXLen())
15912 return SDValue();
15913 }
15914
15915 if ((Slct.getOpcode() != ISD::SELECT &&
15916 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
15917 !Slct.hasOneUse())
15918 return SDValue();
15919
15920 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
15922 };
15923
15924 bool SwapSelectOps;
15925 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
15926 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
15927 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
15928 SDValue NonConstantVal;
15929 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
15930 SwapSelectOps = false;
15931 NonConstantVal = FalseVal;
15932 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
15933 SwapSelectOps = true;
15934 NonConstantVal = TrueVal;
15935 } else
15936 return SDValue();
15937
15938 // Slct is now know to be the desired identity constant when CC is true.
15939 TrueVal = OtherOp;
15940 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
15941 // Unless SwapSelectOps says the condition should be false.
15942 if (SwapSelectOps)
15943 std::swap(TrueVal, FalseVal);
15944
15945 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
15946 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
15947 {Slct.getOperand(0), Slct.getOperand(1),
15948 Slct.getOperand(2), TrueVal, FalseVal});
15949
15950 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
15951 {Slct.getOperand(0), TrueVal, FalseVal});
15952}
15953
15954// Attempt combineSelectAndUse on each operand of a commutative operator N.
15956 bool AllOnes,
15957 const RISCVSubtarget &Subtarget) {
15958 SDValue N0 = N->getOperand(0);
15959 SDValue N1 = N->getOperand(1);
15960 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
15961 return Result;
15962 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
15963 return Result;
15964 return SDValue();
15965}
15966
15967// Transform (add (mul x, c0), c1) ->
15968// (add (mul (add x, c1/c0), c0), c1%c0).
15969// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
15970// that should be excluded is when c0*(c1/c0) is simm12, which will lead
15971// to an infinite loop in DAGCombine if transformed.
15972// Or transform (add (mul x, c0), c1) ->
15973// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
15974// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
15975// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
15976// lead to an infinite loop in DAGCombine if transformed.
15977// Or transform (add (mul x, c0), c1) ->
15978// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
15979// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
15980// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
15981// lead to an infinite loop in DAGCombine if transformed.
15982// Or transform (add (mul x, c0), c1) ->
15983// (mul (add x, c1/c0), c0).
15984// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
15986 const RISCVSubtarget &Subtarget) {
15987 // Skip for vector types and larger types.
15988 EVT VT = N->getValueType(0);
15989 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
15990 return SDValue();
15991 // The first operand node must be a MUL and has no other use.
15992 SDValue N0 = N->getOperand(0);
15993 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
15994 return SDValue();
15995 // Check if c0 and c1 match above conditions.
15996 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
15997 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
15998 if (!N0C || !N1C)
15999 return SDValue();
16000 // If N0C has multiple uses it's possible one of the cases in
16001 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
16002 // in an infinite loop.
16003 if (!N0C->hasOneUse())
16004 return SDValue();
16005 int64_t C0 = N0C->getSExtValue();
16006 int64_t C1 = N1C->getSExtValue();
16007 int64_t CA, CB;
16008 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
16009 return SDValue();
16010 // Search for proper CA (non-zero) and CB that both are simm12.
16011 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
16012 !isInt<12>(C0 * (C1 / C0))) {
16013 CA = C1 / C0;
16014 CB = C1 % C0;
16015 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
16016 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
16017 CA = C1 / C0 + 1;
16018 CB = C1 % C0 - C0;
16019 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
16020 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
16021 CA = C1 / C0 - 1;
16022 CB = C1 % C0 + C0;
16023 } else
16024 return SDValue();
16025 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
16026 SDLoc DL(N);
16027 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
16028 DAG.getSignedConstant(CA, DL, VT));
16029 SDValue New1 =
16030 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getSignedConstant(C0, DL, VT));
16031 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getSignedConstant(CB, DL, VT));
16032}
16033
16034// add (zext, zext) -> zext (add (zext, zext))
16035// sub (zext, zext) -> sext (sub (zext, zext))
16036// mul (zext, zext) -> zext (mul (zext, zext))
16037// sdiv (zext, zext) -> zext (sdiv (zext, zext))
16038// udiv (zext, zext) -> zext (udiv (zext, zext))
16039// srem (zext, zext) -> zext (srem (zext, zext))
16040// urem (zext, zext) -> zext (urem (zext, zext))
16041//
16042// where the sum of the extend widths match, and the the range of the bin op
16043// fits inside the width of the narrower bin op. (For profitability on rvv, we
16044// use a power of two for both inner and outer extend.)
16046
16047 EVT VT = N->getValueType(0);
16048 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
16049 return SDValue();
16050
16051 SDValue N0 = N->getOperand(0);
16052 SDValue N1 = N->getOperand(1);
16054 return SDValue();
16055 if (!N0.hasOneUse() || !N1.hasOneUse())
16056 return SDValue();
16057
16058 SDValue Src0 = N0.getOperand(0);
16059 SDValue Src1 = N1.getOperand(0);
16060 EVT SrcVT = Src0.getValueType();
16061 if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||
16062 SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||
16063 SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)
16064 return SDValue();
16065
16066 LLVMContext &C = *DAG.getContext();
16068 EVT NarrowVT = EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());
16069
16070 Src0 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src0), NarrowVT, Src0);
16071 Src1 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src1), NarrowVT, Src1);
16072
16073 // Src0 and Src1 are zero extended, so they're always positive if signed.
16074 //
16075 // sub can produce a negative from two positive operands, so it needs sign
16076 // extended. Other nodes produce a positive from two positive operands, so
16077 // zero extend instead.
16078 unsigned OuterExtend =
16079 N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
16080
16081 return DAG.getNode(
16082 OuterExtend, SDLoc(N), VT,
16083 DAG.getNode(N->getOpcode(), SDLoc(N), NarrowVT, Src0, Src1));
16084}
16085
16086// Try to turn (add (xor bool, 1) -1) into (neg bool).
16088 SDValue N0 = N->getOperand(0);
16089 SDValue N1 = N->getOperand(1);
16090 EVT VT = N->getValueType(0);
16091 SDLoc DL(N);
16092
16093 // RHS should be -1.
16094 if (!isAllOnesConstant(N1))
16095 return SDValue();
16096
16097 // Look for (xor X, 1).
16098 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
16099 return SDValue();
16100
16101 // First xor input should be 0 or 1.
16103 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
16104 return SDValue();
16105
16106 // Emit a negate of the setcc.
16107 return DAG.getNegative(N0.getOperand(0), DL, VT);
16108}
16109
16112 const RISCVSubtarget &Subtarget) {
16113 SelectionDAG &DAG = DCI.DAG;
16114 if (SDValue V = combineAddOfBooleanXor(N, DAG))
16115 return V;
16116 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
16117 return V;
16118 if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) {
16119 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
16120 return V;
16121 if (SDValue V = combineShlAddIAdd(N, DAG, Subtarget))
16122 return V;
16123 }
16124 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16125 return V;
16126 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16127 return V;
16128 if (SDValue V = combineBinOpOfZExt(N, DAG))
16129 return V;
16130
16131 // fold (add (select lhs, rhs, cc, 0, y), x) ->
16132 // (select lhs, rhs, cc, x, (add x, y))
16133 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
16134}
16135
16136// Try to turn a sub boolean RHS and constant LHS into an addi.
16138 SDValue N0 = N->getOperand(0);
16139 SDValue N1 = N->getOperand(1);
16140 EVT VT = N->getValueType(0);
16141 SDLoc DL(N);
16142
16143 // Require a constant LHS.
16144 auto *N0C = dyn_cast<ConstantSDNode>(N0);
16145 if (!N0C)
16146 return SDValue();
16147
16148 // All our optimizations involve subtracting 1 from the immediate and forming
16149 // an ADDI. Make sure the new immediate is valid for an ADDI.
16150 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
16151 if (!ImmValMinus1.isSignedIntN(12))
16152 return SDValue();
16153
16154 SDValue NewLHS;
16155 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
16156 // (sub constant, (setcc x, y, eq/neq)) ->
16157 // (add (setcc x, y, neq/eq), constant - 1)
16158 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
16159 EVT SetCCOpVT = N1.getOperand(0).getValueType();
16160 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
16161 return SDValue();
16162 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
16163 NewLHS =
16164 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
16165 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
16166 N1.getOperand(0).getOpcode() == ISD::SETCC) {
16167 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
16168 // Since setcc returns a bool the xor is equivalent to 1-setcc.
16169 NewLHS = N1.getOperand(0);
16170 } else
16171 return SDValue();
16172
16173 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
16174 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
16175}
16176
16177// Looks for (sub (shl X, 8-Y), (shr X, Y)) where the Y-th bit in each byte is
16178// potentially set. It is fine for Y to be 0, meaning that (sub (shl X, 8), X)
16179// is also valid. Replace with (orc.b X). For example, 0b0000_1000_0000_1000 is
16180// valid with Y=3, while 0b0000_1000_0000_0100 is not.
16182 const RISCVSubtarget &Subtarget) {
16183 if (!Subtarget.hasStdExtZbb())
16184 return SDValue();
16185
16186 EVT VT = N->getValueType(0);
16187
16188 if (VT != Subtarget.getXLenVT() && VT != MVT::i32 && VT != MVT::i16)
16189 return SDValue();
16190
16191 SDValue N0 = N->getOperand(0);
16192 SDValue N1 = N->getOperand(1);
16193
16194 if (N0->getOpcode() != ISD::SHL)
16195 return SDValue();
16196
16197 auto *ShAmtCLeft = dyn_cast<ConstantSDNode>(N0.getOperand(1));
16198 if (!ShAmtCLeft)
16199 return SDValue();
16200 unsigned ShiftedAmount = 8 - ShAmtCLeft->getZExtValue();
16201
16202 if (ShiftedAmount >= 8)
16203 return SDValue();
16204
16205 SDValue LeftShiftOperand = N0->getOperand(0);
16206 SDValue RightShiftOperand = N1;
16207
16208 if (ShiftedAmount != 0) { // Right operand must be a right shift.
16209 if (N1->getOpcode() != ISD::SRL)
16210 return SDValue();
16211 auto *ShAmtCRight = dyn_cast<ConstantSDNode>(N1.getOperand(1));
16212 if (!ShAmtCRight || ShAmtCRight->getZExtValue() != ShiftedAmount)
16213 return SDValue();
16214 RightShiftOperand = N1.getOperand(0);
16215 }
16216
16217 // At least one shift should have a single use.
16218 if (!N0.hasOneUse() && (ShiftedAmount == 0 || !N1.hasOneUse()))
16219 return SDValue();
16220
16221 if (LeftShiftOperand != RightShiftOperand)
16222 return SDValue();
16223
16224 APInt Mask = APInt::getSplat(VT.getSizeInBits(), APInt(8, 0x1));
16225 Mask <<= ShiftedAmount;
16226 // Check that X has indeed the right shape (only the Y-th bit can be set in
16227 // every byte).
16228 if (!DAG.MaskedValueIsZero(LeftShiftOperand, ~Mask))
16229 return SDValue();
16230
16231 return DAG.getNode(RISCVISD::ORC_B, SDLoc(N), VT, LeftShiftOperand);
16232}
16233
16235 const RISCVSubtarget &Subtarget) {
16236 if (SDValue V = combineSubOfBoolean(N, DAG))
16237 return V;
16238
16239 EVT VT = N->getValueType(0);
16240 SDValue N0 = N->getOperand(0);
16241 SDValue N1 = N->getOperand(1);
16242 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
16243 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
16244 isNullConstant(N1.getOperand(1)) &&
16245 N1.getValueType() == N1.getOperand(0).getValueType()) {
16246 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
16247 if (CCVal == ISD::SETLT) {
16248 SDLoc DL(N);
16249 unsigned ShAmt = N0.getValueSizeInBits() - 1;
16250 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
16251 DAG.getConstant(ShAmt, DL, VT));
16252 }
16253 }
16254
16255 if (SDValue V = combineBinOpOfZExt(N, DAG))
16256 return V;
16257 if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))
16258 return V;
16259
16260 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
16261 // (select lhs, rhs, cc, x, (sub x, y))
16262 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
16263}
16264
16265// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
16266// Legalizing setcc can introduce xors like this. Doing this transform reduces
16267// the number of xors and may allow the xor to fold into a branch condition.
16269 SDValue N0 = N->getOperand(0);
16270 SDValue N1 = N->getOperand(1);
16271 bool IsAnd = N->getOpcode() == ISD::AND;
16272
16273 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
16274 return SDValue();
16275
16276 if (!N0.hasOneUse() || !N1.hasOneUse())
16277 return SDValue();
16278
16279 SDValue N01 = N0.getOperand(1);
16280 SDValue N11 = N1.getOperand(1);
16281
16282 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
16283 // (xor X, -1) based on the upper bits of the other operand being 0. If the
16284 // operation is And, allow one of the Xors to use -1.
16285 if (isOneConstant(N01)) {
16286 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
16287 return SDValue();
16288 } else if (isOneConstant(N11)) {
16289 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
16290 if (!(IsAnd && isAllOnesConstant(N01)))
16291 return SDValue();
16292 } else
16293 return SDValue();
16294
16295 EVT VT = N->getValueType(0);
16296
16297 SDValue N00 = N0.getOperand(0);
16298 SDValue N10 = N1.getOperand(0);
16299
16300 // The LHS of the xors needs to be 0/1.
16302 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
16303 return SDValue();
16304
16305 // Invert the opcode and insert a new xor.
16306 SDLoc DL(N);
16307 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
16308 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
16309 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
16310}
16311
16312// Fold (vXi8 (trunc (vselect (setltu, X, 256), X, (sext (setgt X, 0))))) to
16313// (vXi8 (trunc (smin (smax X, 0), 255))). This represents saturating a signed
16314// value to an unsigned value. This will be lowered to vmax and series of
16315// vnclipu instructions later. This can be extended to other truncated types
16316// other than i8 by replacing 256 and 255 with the equivalent constants for the
16317// type.
16319 EVT VT = N->getValueType(0);
16320 SDValue N0 = N->getOperand(0);
16321 EVT SrcVT = N0.getValueType();
16322
16323 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16324 if (!VT.isVector() || !TLI.isTypeLegal(VT) || !TLI.isTypeLegal(SrcVT))
16325 return SDValue();
16326
16327 if (N0.getOpcode() != ISD::VSELECT || !N0.hasOneUse())
16328 return SDValue();
16329
16330 SDValue Cond = N0.getOperand(0);
16331 SDValue True = N0.getOperand(1);
16332 SDValue False = N0.getOperand(2);
16333
16334 if (Cond.getOpcode() != ISD::SETCC)
16335 return SDValue();
16336
16337 // FIXME: Support the version of this pattern with the select operands
16338 // swapped.
16339 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16340 if (CCVal != ISD::SETULT)
16341 return SDValue();
16342
16343 SDValue CondLHS = Cond.getOperand(0);
16344 SDValue CondRHS = Cond.getOperand(1);
16345
16346 if (CondLHS != True)
16347 return SDValue();
16348
16349 unsigned ScalarBits = VT.getScalarSizeInBits();
16350
16351 // FIXME: Support other constants.
16352 ConstantSDNode *CondRHSC = isConstOrConstSplat(CondRHS);
16353 if (!CondRHSC || CondRHSC->getAPIntValue() != (1ULL << ScalarBits))
16354 return SDValue();
16355
16356 if (False.getOpcode() != ISD::SIGN_EXTEND)
16357 return SDValue();
16358
16359 False = False.getOperand(0);
16360
16361 if (False.getOpcode() != ISD::SETCC || False.getOperand(0) != True)
16362 return SDValue();
16363
16364 ConstantSDNode *FalseRHSC = isConstOrConstSplat(False.getOperand(1));
16365 if (!FalseRHSC || !FalseRHSC->isZero())
16366 return SDValue();
16367
16368 ISD::CondCode CCVal2 = cast<CondCodeSDNode>(False.getOperand(2))->get();
16369 if (CCVal2 != ISD::SETGT)
16370 return SDValue();
16371
16372 // Emit the signed to unsigned saturation pattern.
16373 SDLoc DL(N);
16374 SDValue Max =
16375 DAG.getNode(ISD::SMAX, DL, SrcVT, True, DAG.getConstant(0, DL, SrcVT));
16376 SDValue Min =
16377 DAG.getNode(ISD::SMIN, DL, SrcVT, Max,
16378 DAG.getConstant((1ULL << ScalarBits) - 1, DL, SrcVT));
16379 return DAG.getNode(ISD::TRUNCATE, DL, VT, Min);
16380}
16381
16382// Handle P extension averaging subtraction pattern:
16383// (vXiY (trunc (srl (sub ([s|z]ext vXiY:$a), ([s|z]ext vXiY:$b)), 1)))
16384// -> PASUB/PASUBU
16386 const RISCVSubtarget &Subtarget) {
16387 SDValue N0 = N->getOperand(0);
16388 EVT VT = N->getValueType(0);
16389 if (N0.getOpcode() != ISD::SRL)
16390 return SDValue();
16391
16392 MVT VecVT = VT.getSimpleVT();
16393 if (VecVT != MVT::v4i16 && VecVT != MVT::v2i16 && VecVT != MVT::v8i8 &&
16394 VecVT != MVT::v4i8 && VecVT != MVT::v2i32)
16395 return SDValue();
16396
16397 // Check if shift amount is 1
16398 SDValue ShAmt = N0.getOperand(1);
16399 if (ShAmt.getOpcode() != ISD::BUILD_VECTOR)
16400 return SDValue();
16401
16403 if (!BV)
16404 return SDValue();
16405 SDValue Splat = BV->getSplatValue();
16406 if (!Splat)
16407 return SDValue();
16409 if (!C)
16410 return SDValue();
16411 if (C->getZExtValue() != 1)
16412 return SDValue();
16413
16414 // Check for SUB operation
16415 SDValue Sub = N0.getOperand(0);
16416 if (Sub.getOpcode() != ISD::SUB)
16417 return SDValue();
16418
16419 SDValue LHS = Sub.getOperand(0);
16420 SDValue RHS = Sub.getOperand(1);
16421
16422 // Check if both operands are sign/zero extends from the target
16423 // type
16424 bool IsSignExt = LHS.getOpcode() == ISD::SIGN_EXTEND &&
16425 RHS.getOpcode() == ISD::SIGN_EXTEND;
16426 bool IsZeroExt = LHS.getOpcode() == ISD::ZERO_EXTEND &&
16427 RHS.getOpcode() == ISD::ZERO_EXTEND;
16428
16429 if (!IsSignExt && !IsZeroExt)
16430 return SDValue();
16431
16432 SDValue A = LHS.getOperand(0);
16433 SDValue B = RHS.getOperand(0);
16434
16435 // Check if the extends are from our target vector type
16436 if (A.getValueType() != VT || B.getValueType() != VT)
16437 return SDValue();
16438
16439 // Determine the instruction based on type and signedness
16440 unsigned Opc;
16441 if (IsSignExt)
16442 Opc = RISCVISD::PASUB;
16443 else if (IsZeroExt)
16444 Opc = RISCVISD::PASUBU;
16445 else
16446 return SDValue();
16447
16448 // Create the machine node directly
16449 return DAG.getNode(Opc, SDLoc(N), VT, {A, B});
16450}
16451
16453 const RISCVSubtarget &Subtarget) {
16454 SDValue N0 = N->getOperand(0);
16455 EVT VT = N->getValueType(0);
16456
16457 if (VT.isFixedLengthVector() && Subtarget.enablePExtCodeGen())
16458 return combinePExtTruncate(N, DAG, Subtarget);
16459
16460 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
16461 // extending X. This is safe since we only need the LSB after the shift and
16462 // shift amounts larger than 31 would produce poison. If we wait until
16463 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
16464 // to use a BEXT instruction.
16465 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
16466 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
16467 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
16468 SDLoc DL(N0);
16469 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
16470 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
16471 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
16472 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
16473 }
16474
16475 return combineTruncSelectToSMaxUSat(N, DAG);
16476}
16477
16478// InstCombinerImpl::transformZExtICmp will narrow a zext of an icmp with a
16479// truncation. But RVV doesn't have truncation instructions for more than twice
16480// the bitwidth.
16481//
16482// E.g. trunc <vscale x 1 x i64> %x to <vscale x 1 x i8> will generate:
16483//
16484// vsetvli a0, zero, e32, m2, ta, ma
16485// vnsrl.wi v12, v8, 0
16486// vsetvli zero, zero, e16, m1, ta, ma
16487// vnsrl.wi v8, v12, 0
16488// vsetvli zero, zero, e8, mf2, ta, ma
16489// vnsrl.wi v8, v8, 0
16490//
16491// So reverse the combine so we generate an vmseq/vmsne again:
16492//
16493// and (lshr (trunc X), ShAmt), 1
16494// -->
16495// zext (icmp ne (and X, (1 << ShAmt)), 0)
16496//
16497// and (lshr (not (trunc X)), ShAmt), 1
16498// -->
16499// zext (icmp eq (and X, (1 << ShAmt)), 0)
16501 const RISCVSubtarget &Subtarget) {
16502 using namespace SDPatternMatch;
16503 SDLoc DL(N);
16504
16505 if (!Subtarget.hasVInstructions())
16506 return SDValue();
16507
16508 EVT VT = N->getValueType(0);
16509 if (!VT.isVector())
16510 return SDValue();
16511
16512 APInt ShAmt;
16513 SDValue Inner;
16514 if (!sd_match(N, m_And(m_OneUse(m_Srl(m_Value(Inner), m_ConstInt(ShAmt))),
16515 m_One())))
16516 return SDValue();
16517
16518 SDValue X;
16519 bool IsNot;
16520 if (sd_match(Inner, m_Not(m_Trunc(m_Value(X)))))
16521 IsNot = true;
16522 else if (sd_match(Inner, m_Trunc(m_Value(X))))
16523 IsNot = false;
16524 else
16525 return SDValue();
16526
16527 EVT WideVT = X.getValueType();
16528 if (VT.getScalarSizeInBits() >= WideVT.getScalarSizeInBits() / 2)
16529 return SDValue();
16530
16531 SDValue Res =
16532 DAG.getNode(ISD::AND, DL, WideVT, X,
16533 DAG.getConstant(1ULL << ShAmt.getZExtValue(), DL, WideVT));
16534 Res = DAG.getSetCC(DL,
16535 EVT::getVectorVT(*DAG.getContext(), MVT::i1,
16536 WideVT.getVectorElementCount()),
16537 Res, DAG.getConstant(0, DL, WideVT),
16538 IsNot ? ISD::SETEQ : ISD::SETNE);
16539 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
16540}
16541
16542// (and (i1) f, (setcc c, 0, ne)) -> (czero.nez f, c)
16543// (and (i1) f, (setcc c, 0, eq)) -> (czero.eqz f, c)
16544// (and (setcc c, 0, ne), (i1) g) -> (czero.nez g, c)
16545// (and (setcc c, 0, eq), (i1) g) -> (czero.eqz g, c)
16547 const RISCVSubtarget &Subtarget) {
16548 if (!Subtarget.hasCZEROLike())
16549 return SDValue();
16550
16551 SDValue N0 = N->getOperand(0);
16552 SDValue N1 = N->getOperand(1);
16553
16554 auto IsEqualCompZero = [](SDValue &V) -> bool {
16555 if (V.getOpcode() == ISD::SETCC && isNullConstant(V.getOperand(1))) {
16556 ISD::CondCode CC = cast<CondCodeSDNode>(V.getOperand(2))->get();
16558 return true;
16559 }
16560 return false;
16561 };
16562
16563 if (!IsEqualCompZero(N0) || !N0.hasOneUse())
16564 std::swap(N0, N1);
16565 if (!IsEqualCompZero(N0) || !N0.hasOneUse())
16566 return SDValue();
16567
16568 KnownBits Known = DAG.computeKnownBits(N1);
16569 if (Known.getMaxValue().ugt(1))
16570 return SDValue();
16571
16572 unsigned CzeroOpcode =
16573 (cast<CondCodeSDNode>(N0.getOperand(2))->get() == ISD::SETNE)
16574 ? RISCVISD::CZERO_EQZ
16575 : RISCVISD::CZERO_NEZ;
16576
16577 EVT VT = N->getValueType(0);
16578 SDLoc DL(N);
16579 return DAG.getNode(CzeroOpcode, DL, VT, N1, N0.getOperand(0));
16580}
16581
16584 SelectionDAG &DAG = DCI.DAG;
16585 if (N->getOpcode() != ISD::AND)
16586 return SDValue();
16587
16588 SDValue N0 = N->getOperand(0);
16589 if (N0.getOpcode() != ISD::ATOMIC_LOAD)
16590 return SDValue();
16591 if (!N0.hasOneUse())
16592 return SDValue();
16593
16596 return SDValue();
16597
16598 EVT LoadedVT = ALoad->getMemoryVT();
16599 ConstantSDNode *MaskConst = dyn_cast<ConstantSDNode>(N->getOperand(1));
16600 if (!MaskConst)
16601 return SDValue();
16602 uint64_t Mask = MaskConst->getZExtValue();
16603 uint64_t ExpectedMask = maskTrailingOnes<uint64_t>(LoadedVT.getSizeInBits());
16604 if (Mask != ExpectedMask)
16605 return SDValue();
16606
16607 SDValue ZextLoad = DAG.getAtomicLoad(
16608 ISD::ZEXTLOAD, SDLoc(N), ALoad->getMemoryVT(), N->getValueType(0),
16609 ALoad->getChain(), ALoad->getBasePtr(), ALoad->getMemOperand());
16610 DCI.CombineTo(N, ZextLoad);
16611 DAG.ReplaceAllUsesOfValueWith(SDValue(N0.getNode(), 1), ZextLoad.getValue(1));
16613 return SDValue(N, 0);
16614}
16615
16616// Combines two comparison operation and logic operation to one selection
16617// operation(min, max) and logic operation. Returns new constructed Node if
16618// conditions for optimization are satisfied.
16621 const RISCVSubtarget &Subtarget) {
16622 SelectionDAG &DAG = DCI.DAG;
16623
16624 SDValue N0 = N->getOperand(0);
16625 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
16626 // extending X. This is safe since we only need the LSB after the shift and
16627 // shift amounts larger than 31 would produce poison. If we wait until
16628 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
16629 // to use a BEXT instruction.
16630 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
16631 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
16632 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
16633 N0.hasOneUse()) {
16634 SDLoc DL(N);
16635 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
16636 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
16637 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
16638 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
16639 DAG.getConstant(1, DL, MVT::i64));
16640 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
16641 }
16642
16643 if (SDValue V = reverseZExtICmpCombine(N, DAG, Subtarget))
16644 return V;
16645 if (DCI.isAfterLegalizeDAG())
16646 if (SDValue V = combineANDOfSETCCToCZERO(N, DAG, Subtarget))
16647 return V;
16648 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16649 return V;
16650 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16651 return V;
16652 if (SDValue V = reduceANDOfAtomicLoad(N, DCI))
16653 return V;
16654
16655 if (DCI.isAfterLegalizeDAG())
16656 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
16657 return V;
16658
16659 // fold (and (select lhs, rhs, cc, -1, y), x) ->
16660 // (select lhs, rhs, cc, x, (and x, y))
16661 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
16662}
16663
16664// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
16665// FIXME: Generalize to other binary operators with same operand.
16667 SelectionDAG &DAG) {
16668 assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
16669
16670 if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
16671 N1.getOpcode() != RISCVISD::CZERO_NEZ ||
16672 !N0.hasOneUse() || !N1.hasOneUse())
16673 return SDValue();
16674
16675 // Should have the same condition.
16676 SDValue Cond = N0.getOperand(1);
16677 if (Cond != N1.getOperand(1))
16678 return SDValue();
16679
16680 SDValue TrueV = N0.getOperand(0);
16681 SDValue FalseV = N1.getOperand(0);
16682
16683 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
16684 TrueV.getOperand(1) != FalseV.getOperand(1) ||
16685 !isOneConstant(TrueV.getOperand(1)) ||
16686 !TrueV.hasOneUse() || !FalseV.hasOneUse())
16687 return SDValue();
16688
16689 EVT VT = N->getValueType(0);
16690 SDLoc DL(N);
16691
16692 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
16693 Cond);
16694 SDValue NewN1 =
16695 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0), Cond);
16696 SDValue NewOr =
16697 DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1, SDNodeFlags::Disjoint);
16698 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
16699}
16700
16701// (xor X, (xor (and X, C2), Y))
16702// ->(qc_insb X, (sra Y, ShAmt), Width, ShAmt)
16703// where C2 is a shifted mask with width = Width and shift = ShAmt
16704// qc_insb might become qc.insb or qc.insbi depending on the operands.
16706 const RISCVSubtarget &Subtarget) {
16707 if (!Subtarget.hasVendorXqcibm())
16708 return SDValue();
16709
16710 using namespace SDPatternMatch;
16711 SDValue Base, Inserted;
16712 APInt CMask;
16713 if (!sd_match(N, m_Xor(m_Value(Base),
16715 m_ConstInt(CMask))),
16716 m_Value(Inserted))))))
16717 return SDValue();
16718
16719 if (N->getValueType(0) != MVT::i32)
16720 return SDValue();
16721 unsigned Width, ShAmt;
16722 if (!CMask.isShiftedMask(ShAmt, Width))
16723 return SDValue();
16724
16725 // Check if all zero bits in CMask are also zero in Inserted
16726 if (!DAG.MaskedValueIsZero(Inserted, ~CMask))
16727 return SDValue();
16728
16729 SDLoc DL(N);
16730
16731 // `Inserted` needs to be right shifted before it is put into the
16732 // instruction.
16733 Inserted = DAG.getNode(ISD::SRA, DL, MVT::i32, Inserted,
16734 DAG.getShiftAmountConstant(ShAmt, MVT::i32, DL));
16735
16736 SDValue Ops[] = {Base, Inserted, DAG.getConstant(Width, DL, MVT::i32),
16737 DAG.getConstant(ShAmt, DL, MVT::i32)};
16738 return DAG.getNode(RISCVISD::QC_INSB, DL, MVT::i32, Ops);
16739}
16740
16742 const RISCVSubtarget &Subtarget) {
16743 if (!Subtarget.hasVendorXqcibm())
16744 return SDValue();
16745
16746 using namespace SDPatternMatch;
16747
16748 SDValue X;
16749 APInt MaskImm;
16750 if (!sd_match(N, m_Or(m_OneUse(m_Value(X)), m_ConstInt(MaskImm))))
16751 return SDValue();
16752
16753 unsigned ShAmt, Width;
16754 if (!MaskImm.isShiftedMask(ShAmt, Width) || MaskImm.isSignedIntN(12))
16755 return SDValue();
16756
16757 if (N->getValueType(0) != MVT::i32)
16758 return SDValue();
16759
16760 // If Zbs is enabled and it is a single bit set we can use BSETI which
16761 // can be compressed to C_BSETI when Xqcibm in enabled.
16762 if (Width == 1 && Subtarget.hasStdExtZbs())
16763 return SDValue();
16764
16765 // If C1 is a shifted mask (but can't be formed as an ORI),
16766 // use a bitfield insert of -1.
16767 // Transform (or x, C1)
16768 // -> (qc.insbi x, -1, width, shift)
16769 SDLoc DL(N);
16770
16771 SDValue Ops[] = {X, DAG.getSignedConstant(-1, DL, MVT::i32),
16772 DAG.getConstant(Width, DL, MVT::i32),
16773 DAG.getConstant(ShAmt, DL, MVT::i32)};
16774 return DAG.getNode(RISCVISD::QC_INSB, DL, MVT::i32, Ops);
16775}
16776
16777// Generate a QC_INSB/QC_INSBI from 'or (and X, MaskImm), OrImm' iff the value
16778// being inserted only sets known zero bits.
16780 const RISCVSubtarget &Subtarget) {
16781 // Supported only in Xqcibm for now.
16782 if (!Subtarget.hasVendorXqcibm())
16783 return SDValue();
16784
16785 using namespace SDPatternMatch;
16786
16787 SDValue Inserted;
16788 APInt MaskImm, OrImm;
16789 if (!sd_match(
16790 N, m_SpecificVT(MVT::i32, m_Or(m_OneUse(m_And(m_Value(Inserted),
16791 m_ConstInt(MaskImm))),
16792 m_ConstInt(OrImm)))))
16793 return SDValue();
16794
16795 // Compute the Known Zero for the AND as this allows us to catch more general
16796 // cases than just looking for AND with imm.
16797 KnownBits Known = DAG.computeKnownBits(N->getOperand(0));
16798
16799 // The bits being inserted must only set those bits that are known to be
16800 // zero.
16801 if (!OrImm.isSubsetOf(Known.Zero)) {
16802 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
16803 // currently handle this case.
16804 return SDValue();
16805 }
16806
16807 unsigned ShAmt, Width;
16808 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
16809 if (!Known.Zero.isShiftedMask(ShAmt, Width))
16810 return SDValue();
16811
16812 // QC_INSB(I) dst, src, #width, #shamt.
16813 SDLoc DL(N);
16814
16815 SDValue ImmNode =
16816 DAG.getSignedConstant(OrImm.getSExtValue() >> ShAmt, DL, MVT::i32);
16817
16818 SDValue Ops[] = {Inserted, ImmNode, DAG.getConstant(Width, DL, MVT::i32),
16819 DAG.getConstant(ShAmt, DL, MVT::i32)};
16820 return DAG.getNode(RISCVISD::QC_INSB, DL, MVT::i32, Ops);
16821}
16822
16824 const RISCVSubtarget &Subtarget) {
16825 SelectionDAG &DAG = DCI.DAG;
16826
16827 if (SDValue V = combineOrToBitfieldInsert(N, DAG, Subtarget))
16828 return V;
16829 if (SDValue V = combineOrAndToBitfieldInsert(N, DAG, Subtarget))
16830 return V;
16831 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16832 return V;
16833 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16834 return V;
16835
16836 if (DCI.isAfterLegalizeDAG())
16837 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
16838 return V;
16839
16840 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
16841 // We may be able to pull a common operation out of the true and false value.
16842 SDValue N0 = N->getOperand(0);
16843 SDValue N1 = N->getOperand(1);
16844 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
16845 return V;
16846 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
16847 return V;
16848
16849 // fold (or (select cond, 0, y), x) ->
16850 // (select cond, x, (or x, y))
16851 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
16852}
16853
16855 const RISCVSubtarget &Subtarget) {
16856 SDValue N0 = N->getOperand(0);
16857 SDValue N1 = N->getOperand(1);
16858
16859 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
16860 // (ADDI (BSET X0, X), -1). If we wait until type legalization, we'll create
16861 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
16862 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
16863 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
16864 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
16865 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
16866 SDLoc DL(N);
16867 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
16868 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
16869 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
16870 SDValue Not = DAG.getNOT(DL, Shl, MVT::i64);
16871 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Not);
16872 }
16873
16874 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
16875 // NOTE: Assumes ROL being legal means ROLW is legal.
16876 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16877 if (N0.getOpcode() == RISCVISD::SLLW &&
16879 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
16880 SDLoc DL(N);
16881 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
16882 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
16883 }
16884
16885 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
16886 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
16887 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
16889 if (ConstN00 && CC == ISD::SETLT) {
16890 EVT VT = N0.getValueType();
16891 SDLoc DL(N0);
16892 const APInt &Imm = ConstN00->getAPIntValue();
16893 if ((Imm + 1).isSignedIntN(12))
16894 return DAG.getSetCC(DL, VT, N0.getOperand(1),
16895 DAG.getConstant(Imm + 1, DL, VT), CC);
16896 }
16897 }
16898
16899 if (SDValue V = combineXorToBitfieldInsert(N, DAG, Subtarget))
16900 return V;
16901
16902 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
16903 return V;
16904 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
16905 return V;
16906
16907 // fold (xor (select cond, 0, y), x) ->
16908 // (select cond, x, (xor x, y))
16909 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
16910}
16911
16912// Try to expand a multiply to a sequence of shifts and add/subs,
16913// for a machine without native mul instruction.
16915 uint64_t MulAmt) {
16916 SDLoc DL(N);
16917 EVT VT = N->getValueType(0);
16919
16920 SDValue Result = DAG.getConstant(0, DL, N->getValueType(0));
16921 SDValue N0 = N->getOperand(0);
16922
16923 // Find the Non-adjacent form of the multiplier.
16924 for (uint64_t E = MulAmt, I = 0; E && I < BitWidth; ++I, E >>= 1) {
16925 if (E & 1) {
16926 bool IsAdd = (E & 3) == 1;
16927 E -= IsAdd ? 1 : -1;
16928 SDValue ShiftVal = DAG.getNode(ISD::SHL, DL, VT, N0,
16929 DAG.getShiftAmountConstant(I, VT, DL));
16930 ISD::NodeType AddSubOp = IsAdd ? ISD::ADD : ISD::SUB;
16931 Result = DAG.getNode(AddSubOp, DL, VT, Result, ShiftVal);
16932 }
16933 }
16934
16935 return Result;
16936}
16937
16938// X * (2^N +/- 2^M) -> (add/sub (shl X, C1), (shl X, C2))
16940 uint64_t MulAmt) {
16941 uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
16942 SDValue X = N->getOperand(0);
16944 uint64_t ShiftAmt1;
16945 bool CanSub = isPowerOf2_64(MulAmt + MulAmtLowBit);
16946 auto PreferSub = [X, MulAmtLowBit]() {
16947 // For MulAmt == 3 << M both (X << M + 2) - (X << M)
16948 // and (X << M + 1) + (X << M) are valid expansions.
16949 // Prefer SUB if we can get (X << M + 2) for free,
16950 // because X is exact (Y >> M + 2).
16951 uint64_t ShAmt = Log2_64(MulAmtLowBit) + 2;
16952 using namespace SDPatternMatch;
16953 return sd_match(X, m_ExactSr(m_Value(), m_SpecificInt(ShAmt)));
16954 };
16955 if (isPowerOf2_64(MulAmt - MulAmtLowBit) && !(CanSub && PreferSub())) {
16956 Op = ISD::ADD;
16957 ShiftAmt1 = MulAmt - MulAmtLowBit;
16958 } else if (CanSub) {
16959 Op = ISD::SUB;
16960 ShiftAmt1 = MulAmt + MulAmtLowBit;
16961 } else {
16962 return SDValue();
16963 }
16964 EVT VT = N->getValueType(0);
16965 SDLoc DL(N);
16966 SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, X,
16967 DAG.getConstant(Log2_64(ShiftAmt1), DL, VT));
16968 SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, X,
16969 DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT));
16970 return DAG.getNode(Op, DL, VT, Shift1, Shift2);
16971}
16972
16973static SDValue getShlAddShlAdd(SDNode *N, SelectionDAG &DAG, unsigned ShX,
16974 unsigned ShY, bool AddX, unsigned Shift) {
16975 SDLoc DL(N);
16976 EVT VT = N->getValueType(0);
16977 SDValue X = N->getOperand(0);
16978 // Put the shift first if we can fold:
16979 // a. a zext into the shift forming a slli.uw
16980 // b. an exact shift right forming one shorter shift or no shift at all
16981 using namespace SDPatternMatch;
16982 if (Shift != 0 &&
16983 sd_match(X, m_AnyOf(m_And(m_Value(), m_SpecificInt(UINT64_C(0xffffffff))),
16984 m_ExactSr(m_Value(), m_ConstInt())))) {
16985 X = DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(Shift, DL, VT));
16986 Shift = 0;
16987 }
16988 SDValue ShlAdd = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
16989 DAG.getTargetConstant(ShY, DL, VT), X);
16990 if (ShX != 0)
16991 ShlAdd = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, ShlAdd,
16992 DAG.getTargetConstant(ShX, DL, VT), AddX ? X : ShlAdd);
16993 if (Shift == 0)
16994 return ShlAdd;
16995 // Otherwise, put the shl last so that it can fold with following instructions
16996 // (e.g. sext or add).
16997 return DAG.getNode(ISD::SHL, DL, VT, ShlAdd, DAG.getConstant(Shift, DL, VT));
16998}
16999
17001 uint64_t MulAmt, unsigned Shift) {
17002 switch (MulAmt) {
17003 // 3/5/9 -> (shYadd X, X)
17004 case 3:
17005 return getShlAddShlAdd(N, DAG, 0, 1, /*AddX=*/false, Shift);
17006 case 5:
17007 return getShlAddShlAdd(N, DAG, 0, 2, /*AddX=*/false, Shift);
17008 case 9:
17009 return getShlAddShlAdd(N, DAG, 0, 3, /*AddX=*/false, Shift);
17010 // 3/5/9 * 3/5/9 -> (shXadd (shYadd X, X), (shYadd X, X))
17011 case 5 * 3:
17012 return getShlAddShlAdd(N, DAG, 2, 1, /*AddX=*/false, Shift);
17013 case 9 * 3:
17014 return getShlAddShlAdd(N, DAG, 3, 1, /*AddX=*/false, Shift);
17015 case 5 * 5:
17016 return getShlAddShlAdd(N, DAG, 2, 2, /*AddX=*/false, Shift);
17017 case 9 * 5:
17018 return getShlAddShlAdd(N, DAG, 3, 2, /*AddX=*/false, Shift);
17019 case 9 * 9:
17020 return getShlAddShlAdd(N, DAG, 3, 3, /*AddX=*/false, Shift);
17021 default:
17022 break;
17023 }
17024
17025 int ShX;
17026 if (int ShY = isShifted359(MulAmt - 1, ShX)) {
17027 assert(ShX != 0 && "MulAmt=4,6,10 handled before");
17028 // 2/4/8 * 3/5/9 + 1 -> (shXadd (shYadd X, X), X)
17029 if (ShX <= 3)
17030 return getShlAddShlAdd(N, DAG, ShX, ShY, /*AddX=*/true, Shift);
17031 // 2^N * 3/5/9 + 1 -> (add (shYadd (shl X, N), (shl X, N)), X)
17032 if (Shift == 0) {
17033 SDLoc DL(N);
17034 EVT VT = N->getValueType(0);
17035 SDValue X = N->getOperand(0);
17036 SDValue Shl =
17037 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShX, DL, VT));
17038 SDValue ShlAdd = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl,
17039 DAG.getTargetConstant(ShY, DL, VT), Shl);
17040 return DAG.getNode(ISD::ADD, DL, VT, ShlAdd, X);
17041 }
17042 }
17043 return SDValue();
17044}
17045
17046// Try to expand a scalar multiply to a faster sequence.
17049 const RISCVSubtarget &Subtarget) {
17050
17051 EVT VT = N->getValueType(0);
17052
17053 // LI + MUL is usually smaller than the alternative sequence.
17055 return SDValue();
17056
17057 if (VT != Subtarget.getXLenVT())
17058 return SDValue();
17059
17060 bool ShouldExpandMul =
17061 (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer()) ||
17062 !Subtarget.hasStdExtZmmul();
17063 if (!ShouldExpandMul)
17064 return SDValue();
17065
17066 ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
17067 if (!CNode)
17068 return SDValue();
17069 uint64_t MulAmt = CNode->getZExtValue();
17070
17071 // Don't do this if the Xqciac extension is enabled and the MulAmt in simm12.
17072 if (Subtarget.hasVendorXqciac() && isInt<12>(CNode->getSExtValue()))
17073 return SDValue();
17074
17075 // WARNING: The code below is knowingly incorrect with regards to undef
17076 // semantics. We're adding additional uses of X here, and in principle, we
17077 // should be freezing X before doing so. However, adding freeze here causes
17078 // real regressions, and no other target properly freezes X in these cases
17079 // either.
17080 if (Subtarget.hasShlAdd(3)) {
17081 // 3/5/9 * 2^N -> (shl (shXadd X, X), N)
17082 // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
17083 // of 25 which happen to be quite common.
17084 // (2/4/8 * 3/5/9 + 1) * 2^N
17085 unsigned Shift = llvm::countr_zero(MulAmt);
17086 if (SDValue V = expandMulToShlAddShlAdd(N, DAG, MulAmt >> Shift, Shift))
17087 return V;
17088
17089 // If this is a power 2 + 2/4/8, we can use a shift followed by a single
17090 // shXadd. First check if this a sum of two power of 2s because that's
17091 // easy. Then count how many zeros are up to the first bit.
17092 SDValue X = N->getOperand(0);
17093 if (Shift >= 1 && Shift <= 3 && isPowerOf2_64(MulAmt & (MulAmt - 1))) {
17094 unsigned ShiftAmt = llvm::countr_zero((MulAmt & (MulAmt - 1)));
17095 SDLoc DL(N);
17096 SDValue Shift1 =
17097 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
17098 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
17099 DAG.getTargetConstant(Shift, DL, VT), Shift1);
17100 }
17101
17102 // TODO: 2^(C1>3) * 3/5/9 - 1
17103
17104 // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
17105 if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
17106 unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
17107 if (ScaleShift >= 1 && ScaleShift < 4) {
17108 unsigned ShiftAmt = llvm::countr_zero((MulAmt - 1) & (MulAmt - 2));
17109 SDLoc DL(N);
17110 SDValue Shift1 =
17111 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
17112 return DAG.getNode(
17113 ISD::ADD, DL, VT, Shift1,
17114 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
17115 DAG.getTargetConstant(ScaleShift, DL, VT), X));
17116 }
17117 }
17118
17119 // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
17120 for (uint64_t Offset : {3, 5, 9}) {
17121 if (isPowerOf2_64(MulAmt + Offset)) {
17122 unsigned ShAmt = llvm::countr_zero(MulAmt + Offset);
17123 if (ShAmt >= VT.getSizeInBits())
17124 continue;
17125 SDLoc DL(N);
17126 SDValue Shift1 =
17127 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShAmt, DL, VT));
17128 SDValue Mul359 =
17129 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
17130 DAG.getTargetConstant(Log2_64(Offset - 1), DL, VT), X);
17131 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
17132 }
17133 }
17134 }
17135
17136 if (SDValue V = expandMulToAddOrSubOfShl(N, DAG, MulAmt))
17137 return V;
17138
17139 if (!Subtarget.hasStdExtZmmul())
17140 return expandMulToNAFSequence(N, DAG, MulAmt);
17141
17142 return SDValue();
17143}
17144
17145// Combine vXi32 (mul (and (lshr X, 15), 0x10001), 0xffff) ->
17146// (bitcast (sra (v2Xi16 (bitcast X)), 15))
17147// Same for other equivalent types with other equivalent constants.
17149 EVT VT = N->getValueType(0);
17150 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17151
17152 // Do this for legal vectors unless they are i1 or i8 vectors.
17153 if (!VT.isVector() || !TLI.isTypeLegal(VT) || VT.getScalarSizeInBits() < 16)
17154 return SDValue();
17155
17156 if (N->getOperand(0).getOpcode() != ISD::AND ||
17157 N->getOperand(0).getOperand(0).getOpcode() != ISD::SRL)
17158 return SDValue();
17159
17160 SDValue And = N->getOperand(0);
17161 SDValue Srl = And.getOperand(0);
17162
17163 APInt V1, V2, V3;
17164 if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) ||
17165 !ISD::isConstantSplatVector(And.getOperand(1).getNode(), V2) ||
17167 return SDValue();
17168
17169 unsigned HalfSize = VT.getScalarSizeInBits() / 2;
17170 if (!V1.isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
17171 V3 != (HalfSize - 1))
17172 return SDValue();
17173
17174 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(),
17175 EVT::getIntegerVT(*DAG.getContext(), HalfSize),
17176 VT.getVectorElementCount() * 2);
17177 SDLoc DL(N);
17178 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, HalfVT, Srl.getOperand(0));
17179 SDValue Sra = DAG.getNode(ISD::SRA, DL, HalfVT, Cast,
17180 DAG.getConstant(HalfSize - 1, DL, HalfVT));
17181 return DAG.getNode(ISD::BITCAST, DL, VT, Sra);
17182}
17183
17186 const RISCVSubtarget &Subtarget) {
17187 EVT VT = N->getValueType(0);
17188 if (!VT.isVector())
17189 return expandMul(N, DAG, DCI, Subtarget);
17190
17191 SDLoc DL(N);
17192 SDValue N0 = N->getOperand(0);
17193 SDValue N1 = N->getOperand(1);
17194 SDValue MulOper;
17195 unsigned AddSubOpc;
17196
17197 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
17198 // (mul x, add (y, 1)) -> (add x, (mul x, y))
17199 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
17200 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
17201 auto IsAddSubWith1 = [&](SDValue V) -> bool {
17202 AddSubOpc = V->getOpcode();
17203 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
17204 SDValue Opnd = V->getOperand(1);
17205 MulOper = V->getOperand(0);
17206 if (AddSubOpc == ISD::SUB)
17207 std::swap(Opnd, MulOper);
17208 if (isOneOrOneSplat(Opnd))
17209 return true;
17210 }
17211 return false;
17212 };
17213
17214 if (IsAddSubWith1(N0)) {
17215 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
17216 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
17217 }
17218
17219 if (IsAddSubWith1(N1)) {
17220 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
17221 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
17222 }
17223
17224 if (SDValue V = combineBinOpOfZExt(N, DAG))
17225 return V;
17226
17228 return V;
17229
17230 return SDValue();
17231}
17232
17233/// According to the property that indexed load/store instructions zero-extend
17234/// their indices, try to narrow the type of index operand.
17235static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
17236 if (isIndexTypeSigned(IndexType))
17237 return false;
17238
17239 if (!N->hasOneUse())
17240 return false;
17241
17242 EVT VT = N.getValueType();
17243 SDLoc DL(N);
17244
17245 // In general, what we're doing here is seeing if we can sink a truncate to
17246 // a smaller element type into the expression tree building our index.
17247 // TODO: We can generalize this and handle a bunch more cases if useful.
17248
17249 // Narrow a buildvector to the narrowest element type. This requires less
17250 // work and less register pressure at high LMUL, and creates smaller constants
17251 // which may be cheaper to materialize.
17252 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
17253 KnownBits Known = DAG.computeKnownBits(N);
17254 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
17255 LLVMContext &C = *DAG.getContext();
17256 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
17257 if (ResultVT.bitsLT(VT.getVectorElementType())) {
17258 N = DAG.getNode(ISD::TRUNCATE, DL,
17259 VT.changeVectorElementType(ResultVT), N);
17260 return true;
17261 }
17262 }
17263
17264 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
17265 if (N.getOpcode() != ISD::SHL)
17266 return false;
17267
17268 SDValue N0 = N.getOperand(0);
17269 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
17270 N0.getOpcode() != RISCVISD::VZEXT_VL)
17271 return false;
17272 if (!N0->hasOneUse())
17273 return false;
17274
17275 APInt ShAmt;
17276 SDValue N1 = N.getOperand(1);
17277 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
17278 return false;
17279
17280 SDValue Src = N0.getOperand(0);
17281 EVT SrcVT = Src.getValueType();
17282 unsigned SrcElen = SrcVT.getScalarSizeInBits();
17283 unsigned ShAmtV = ShAmt.getZExtValue();
17284 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
17285 NewElen = std::max(NewElen, 8U);
17286
17287 // Skip if NewElen is not narrower than the original extended type.
17288 if (NewElen >= N0.getValueType().getScalarSizeInBits())
17289 return false;
17290
17291 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
17292 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
17293
17294 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
17295 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
17296 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
17297 return true;
17298}
17299
17300/// Try to map an integer comparison with size > XLEN to vector instructions
17301/// before type legalization splits it up into chunks.
17302static SDValue
17304 const SDLoc &DL, SelectionDAG &DAG,
17305 const RISCVSubtarget &Subtarget) {
17306 assert(ISD::isIntEqualitySetCC(CC) && "Bad comparison predicate");
17307
17308 if (!Subtarget.hasVInstructions())
17309 return SDValue();
17310
17311 MVT XLenVT = Subtarget.getXLenVT();
17312 EVT OpVT = X.getValueType();
17313 // We're looking for an oversized integer equality comparison.
17314 if (!OpVT.isScalarInteger())
17315 return SDValue();
17316
17317 unsigned OpSize = OpVT.getSizeInBits();
17318 // The size should be larger than XLen and smaller than the maximum vector
17319 // size.
17320 if (OpSize <= Subtarget.getXLen() ||
17321 OpSize > Subtarget.getRealMinVLen() *
17323 return SDValue();
17324
17325 // Don't perform this combine if constructing the vector will be expensive.
17326 auto IsVectorBitCastCheap = [](SDValue X) {
17328 return isa<ConstantSDNode>(X) || X.getValueType().isVector() ||
17329 X.getOpcode() == ISD::LOAD;
17330 };
17331 if (!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y))
17332 return SDValue();
17333
17335 Attribute::NoImplicitFloat))
17336 return SDValue();
17337
17338 // Bail out for non-byte-sized types.
17339 if (!OpVT.isByteSized())
17340 return SDValue();
17341
17342 unsigned VecSize = OpSize / 8;
17343 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, VecSize);
17344 EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, VecSize);
17345
17346 SDValue VecX = DAG.getBitcast(VecVT, X);
17347 SDValue VecY = DAG.getBitcast(VecVT, Y);
17348 SDValue Mask = DAG.getAllOnesConstant(DL, CmpVT);
17349 SDValue VL = DAG.getConstant(VecSize, DL, XLenVT);
17350
17351 SDValue Cmp = DAG.getNode(ISD::VP_SETCC, DL, CmpVT, VecX, VecY,
17352 DAG.getCondCode(ISD::SETNE), Mask, VL);
17353 return DAG.getSetCC(DL, VT,
17354 DAG.getNode(ISD::VP_REDUCE_OR, DL, XLenVT,
17355 DAG.getConstant(0, DL, XLenVT), Cmp, Mask,
17356 VL),
17357 DAG.getConstant(0, DL, XLenVT), CC);
17358}
17359
17362 const RISCVSubtarget &Subtarget) {
17363 SelectionDAG &DAG = DCI.DAG;
17364 SDLoc dl(N);
17365 SDValue N0 = N->getOperand(0);
17366 SDValue N1 = N->getOperand(1);
17367 EVT VT = N->getValueType(0);
17368 EVT OpVT = N0.getValueType();
17369
17370 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
17371 // Looking for an equality compare.
17372 if (!isIntEqualitySetCC(Cond))
17373 return SDValue();
17374
17375 if (SDValue V =
17376 combineVectorSizedSetCCEquality(VT, N0, N1, Cond, dl, DAG, Subtarget))
17377 return V;
17378
17379 if (DCI.isAfterLegalizeDAG() && isa<ConstantSDNode>(N1) &&
17380 N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
17382 const APInt &AndRHSC = N0.getConstantOperandAPInt(1);
17383 // (X & -(1 << C)) == 0 -> (X >> C) == 0 if the AND constant can't use ANDI.
17384 if (isNullConstant(N1) && !isInt<12>(AndRHSC.getSExtValue()) &&
17385 AndRHSC.isNegatedPowerOf2()) {
17386 unsigned ShiftBits = AndRHSC.countr_zero();
17387 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, N0.getOperand(0),
17388 DAG.getConstant(ShiftBits, dl, OpVT));
17389 return DAG.getSetCC(dl, VT, Shift, N1, Cond);
17390 }
17391
17392 // Similar to above but handling the lower 32 bits by using sraiw. Allow
17393 // comparing with constants other than 0 if the constant can be folded into
17394 // addi or xori after shifting.
17395 uint64_t N1Int = cast<ConstantSDNode>(N1)->getZExtValue();
17396 uint64_t AndRHSInt = AndRHSC.getZExtValue();
17397 if (OpVT == MVT::i64 && isUInt<32>(AndRHSInt) &&
17398 isPowerOf2_32(-uint32_t(AndRHSInt)) && (N1Int & AndRHSInt) == N1Int) {
17399 unsigned ShiftBits = llvm::countr_zero(AndRHSInt);
17400 int64_t NewC = SignExtend64<32>(N1Int) >> ShiftBits;
17401 if (NewC >= -2048 && NewC <= 2048) {
17402 SDValue SExt =
17403 DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, OpVT, N0.getOperand(0),
17404 DAG.getValueType(MVT::i32));
17405 SDValue Shift = DAG.getNode(ISD::SRA, dl, OpVT, SExt,
17406 DAG.getConstant(ShiftBits, dl, OpVT));
17407 return DAG.getSetCC(dl, VT, Shift,
17408 DAG.getSignedConstant(NewC, dl, OpVT), Cond);
17409 }
17410 }
17411 }
17412
17413 // Replace (seteq (i64 (and X, 0xffffffff)), C1) with
17414 // (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
17415 // bit 31. Same for setne. C1' may be cheaper to materialize and the
17416 // sext_inreg can become a sext.w instead of a shift pair.
17417 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
17418 return SDValue();
17419
17420 // RHS needs to be a constant.
17421 auto *N1C = dyn_cast<ConstantSDNode>(N1);
17422 if (!N1C)
17423 return SDValue();
17424
17425 // LHS needs to be (and X, 0xffffffff).
17426 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
17428 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
17429 return SDValue();
17430
17431 // Don't do this if the sign bit is provably zero, it will be turned back into
17432 // an AND.
17433 APInt SignMask = APInt::getOneBitSet(64, 31);
17434 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
17435 return SDValue();
17436
17437 const APInt &C1 = N1C->getAPIntValue();
17438
17439 // If the constant is larger than 2^32 - 1 it is impossible for both sides
17440 // to be equal.
17441 if (C1.getActiveBits() > 32)
17442 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
17443
17444 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
17445 N0.getOperand(0), DAG.getValueType(MVT::i32));
17446 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
17447 dl, OpVT), Cond);
17448}
17449
17450static SDValue
17452 const RISCVSubtarget &Subtarget) {
17453 SelectionDAG &DAG = DCI.DAG;
17454 SDValue Src = N->getOperand(0);
17455 EVT VT = N->getValueType(0);
17456 EVT SrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
17457 unsigned Opc = Src.getOpcode();
17458 SDLoc DL(N);
17459
17460 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
17461 // Don't do this with Zhinx. We need to explicitly sign extend the GPR.
17462 if (Opc == RISCVISD::FMV_X_ANYEXTH && SrcVT.bitsGE(MVT::i16) &&
17463 Subtarget.hasStdExtZfhmin())
17464 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, DL, VT, Src.getOperand(0));
17465
17466 // Fold (sext_inreg (shl X, Y), i32) -> (sllw X, Y) iff Y u< 32
17467 if (Opc == ISD::SHL && Subtarget.is64Bit() && SrcVT == MVT::i32 &&
17468 VT == MVT::i64 && !isa<ConstantSDNode>(Src.getOperand(1)) &&
17469 DAG.computeKnownBits(Src.getOperand(1)).countMaxActiveBits() <= 5)
17470 return DAG.getNode(RISCVISD::SLLW, DL, VT, Src.getOperand(0),
17471 Src.getOperand(1));
17472
17473 // Fold (sext_inreg (setcc), i1) -> (sub 0, (setcc))
17474 if (Opc == ISD::SETCC && SrcVT == MVT::i1 && DCI.isAfterLegalizeDAG())
17475 return DAG.getNegative(Src, DL, VT);
17476
17477 // Fold (sext_inreg (xor (setcc), -1), i1) -> (add (setcc), -1)
17478 if (Opc == ISD::XOR && SrcVT == MVT::i1 &&
17479 isAllOnesConstant(Src.getOperand(1)) &&
17480 Src.getOperand(0).getOpcode() == ISD::SETCC && DCI.isAfterLegalizeDAG())
17481 return DAG.getNode(ISD::ADD, DL, VT, Src.getOperand(0),
17482 DAG.getAllOnesConstant(DL, VT));
17483
17484 return SDValue();
17485}
17486
17487namespace {
17488// Forward declaration of the structure holding the necessary information to
17489// apply a combine.
17490struct CombineResult;
17491
17492enum ExtKind : uint8_t {
17493 ZExt = 1 << 0,
17494 SExt = 1 << 1,
17495 FPExt = 1 << 2,
17496 BF16Ext = 1 << 3
17497};
17498/// Helper class for folding sign/zero extensions.
17499/// In particular, this class is used for the following combines:
17500/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
17501/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
17502/// mul | mul_vl -> vwmul(u) | vwmul_su
17503/// shl | shl_vl -> vwsll
17504/// fadd -> vfwadd | vfwadd_w
17505/// fsub -> vfwsub | vfwsub_w
17506/// fmul -> vfwmul
17507/// An object of this class represents an operand of the operation we want to
17508/// combine.
17509/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
17510/// NodeExtensionHelper for `a` and one for `b`.
17511///
17512/// This class abstracts away how the extension is materialized and
17513/// how its number of users affect the combines.
17514///
17515/// In particular:
17516/// - VWADD_W is conceptually == add(op0, sext(op1))
17517/// - VWADDU_W == add(op0, zext(op1))
17518/// - VWSUB_W == sub(op0, sext(op1))
17519/// - VWSUBU_W == sub(op0, zext(op1))
17520/// - VFWADD_W == fadd(op0, fpext(op1))
17521/// - VFWSUB_W == fsub(op0, fpext(op1))
17522/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
17523/// zext|sext(smaller_value).
17524struct NodeExtensionHelper {
17525 /// Records if this operand is like being zero extended.
17526 bool SupportsZExt;
17527 /// Records if this operand is like being sign extended.
17528 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
17529 /// instance, a splat constant (e.g., 3), would support being both sign and
17530 /// zero extended.
17531 bool SupportsSExt;
17532 /// Records if this operand is like being floating point extended.
17533 bool SupportsFPExt;
17534 /// Records if this operand is extended from bf16.
17535 bool SupportsBF16Ext;
17536 /// This boolean captures whether we care if this operand would still be
17537 /// around after the folding happens.
17538 bool EnforceOneUse;
17539 /// Original value that this NodeExtensionHelper represents.
17540 SDValue OrigOperand;
17541
17542 /// Get the value feeding the extension or the value itself.
17543 /// E.g., for zext(a), this would return a.
17544 SDValue getSource() const {
17545 switch (OrigOperand.getOpcode()) {
17546 case ISD::ZERO_EXTEND:
17547 case ISD::SIGN_EXTEND:
17548 case RISCVISD::VSEXT_VL:
17549 case RISCVISD::VZEXT_VL:
17550 case RISCVISD::FP_EXTEND_VL:
17551 return OrigOperand.getOperand(0);
17552 default:
17553 return OrigOperand;
17554 }
17555 }
17556
17557 /// Check if this instance represents a splat.
17558 bool isSplat() const {
17559 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
17560 OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
17561 }
17562
17563 /// Get the extended opcode.
17564 unsigned getExtOpc(ExtKind SupportsExt) const {
17565 switch (SupportsExt) {
17566 case ExtKind::SExt:
17567 return RISCVISD::VSEXT_VL;
17568 case ExtKind::ZExt:
17569 return RISCVISD::VZEXT_VL;
17570 case ExtKind::FPExt:
17571 case ExtKind::BF16Ext:
17572 return RISCVISD::FP_EXTEND_VL;
17573 }
17574 llvm_unreachable("Unknown ExtKind enum");
17575 }
17576
17577 /// Get or create a value that can feed \p Root with the given extension \p
17578 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
17579 /// operand. \see ::getSource().
17580 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
17581 const RISCVSubtarget &Subtarget,
17582 std::optional<ExtKind> SupportsExt) const {
17583 if (!SupportsExt.has_value())
17584 return OrigOperand;
17585
17586 MVT NarrowVT = getNarrowType(Root, *SupportsExt);
17587
17588 SDValue Source = getSource();
17589 assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
17590 if (Source.getValueType() == NarrowVT)
17591 return Source;
17592
17593 unsigned ExtOpc = getExtOpc(*SupportsExt);
17594
17595 // If we need an extension, we should be changing the type.
17596 SDLoc DL(OrigOperand);
17597 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
17598 switch (OrigOperand.getOpcode()) {
17599 case ISD::ZERO_EXTEND:
17600 case ISD::SIGN_EXTEND:
17601 case RISCVISD::VSEXT_VL:
17602 case RISCVISD::VZEXT_VL:
17603 case RISCVISD::FP_EXTEND_VL:
17604 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
17605 case ISD::SPLAT_VECTOR:
17606 return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
17607 case RISCVISD::VMV_V_X_VL:
17608 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
17609 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
17610 case RISCVISD::VFMV_V_F_VL:
17611 Source = Source.getOperand(1);
17612 assert(Source.getOpcode() == ISD::FP_EXTEND && "Unexpected source");
17613 Source = Source.getOperand(0);
17614 assert(Source.getValueType() == NarrowVT.getVectorElementType());
17615 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, NarrowVT,
17616 DAG.getUNDEF(NarrowVT), Source, VL);
17617 default:
17618 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
17619 // and that operand should already have the right NarrowVT so no
17620 // extension should be required at this point.
17621 llvm_unreachable("Unsupported opcode");
17622 }
17623 }
17624
17625 /// Helper function to get the narrow type for \p Root.
17626 /// The narrow type is the type of \p Root where we divided the size of each
17627 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
17628 /// \pre Both the narrow type and the original type should be legal.
17629 static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {
17630 MVT VT = Root->getSimpleValueType(0);
17631
17632 // Determine the narrow size.
17633 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
17634
17635 MVT EltVT = SupportsExt == ExtKind::BF16Ext ? MVT::bf16
17636 : SupportsExt == ExtKind::FPExt
17637 ? MVT::getFloatingPointVT(NarrowSize)
17638 : MVT::getIntegerVT(NarrowSize);
17639
17640 assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&
17641 "Trying to extend something we can't represent");
17642 MVT NarrowVT = MVT::getVectorVT(EltVT, VT.getVectorElementCount());
17643 return NarrowVT;
17644 }
17645
17646 /// Get the opcode to materialize:
17647 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
17648 static unsigned getSExtOpcode(unsigned Opcode) {
17649 switch (Opcode) {
17650 case ISD::ADD:
17651 case RISCVISD::ADD_VL:
17652 case RISCVISD::VWADD_W_VL:
17653 case RISCVISD::VWADDU_W_VL:
17654 case ISD::OR:
17655 case RISCVISD::OR_VL:
17656 return RISCVISD::VWADD_VL;
17657 case ISD::SUB:
17658 case RISCVISD::SUB_VL:
17659 case RISCVISD::VWSUB_W_VL:
17660 case RISCVISD::VWSUBU_W_VL:
17661 return RISCVISD::VWSUB_VL;
17662 case ISD::MUL:
17663 case RISCVISD::MUL_VL:
17664 return RISCVISD::VWMUL_VL;
17665 default:
17666 llvm_unreachable("Unexpected opcode");
17667 }
17668 }
17669
17670 /// Get the opcode to materialize:
17671 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
17672 static unsigned getZExtOpcode(unsigned Opcode) {
17673 switch (Opcode) {
17674 case ISD::ADD:
17675 case RISCVISD::ADD_VL:
17676 case RISCVISD::VWADD_W_VL:
17677 case RISCVISD::VWADDU_W_VL:
17678 case ISD::OR:
17679 case RISCVISD::OR_VL:
17680 return RISCVISD::VWADDU_VL;
17681 case ISD::SUB:
17682 case RISCVISD::SUB_VL:
17683 case RISCVISD::VWSUB_W_VL:
17684 case RISCVISD::VWSUBU_W_VL:
17685 return RISCVISD::VWSUBU_VL;
17686 case ISD::MUL:
17687 case RISCVISD::MUL_VL:
17688 return RISCVISD::VWMULU_VL;
17689 case ISD::SHL:
17690 case RISCVISD::SHL_VL:
17691 return RISCVISD::VWSLL_VL;
17692 default:
17693 llvm_unreachable("Unexpected opcode");
17694 }
17695 }
17696
17697 /// Get the opcode to materialize:
17698 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
17699 static unsigned getFPExtOpcode(unsigned Opcode) {
17700 switch (Opcode) {
17701 case RISCVISD::FADD_VL:
17702 case RISCVISD::VFWADD_W_VL:
17703 return RISCVISD::VFWADD_VL;
17704 case RISCVISD::FSUB_VL:
17705 case RISCVISD::VFWSUB_W_VL:
17706 return RISCVISD::VFWSUB_VL;
17707 case RISCVISD::FMUL_VL:
17708 return RISCVISD::VFWMUL_VL;
17709 case RISCVISD::VFMADD_VL:
17710 return RISCVISD::VFWMADD_VL;
17711 case RISCVISD::VFMSUB_VL:
17712 return RISCVISD::VFWMSUB_VL;
17713 case RISCVISD::VFNMADD_VL:
17714 return RISCVISD::VFWNMADD_VL;
17715 case RISCVISD::VFNMSUB_VL:
17716 return RISCVISD::VFWNMSUB_VL;
17717 default:
17718 llvm_unreachable("Unexpected opcode");
17719 }
17720 }
17721
17722 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
17723 /// newOpcode(a, b).
17724 static unsigned getSUOpcode(unsigned Opcode) {
17725 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
17726 "SU is only supported for MUL");
17727 return RISCVISD::VWMULSU_VL;
17728 }
17729
17730 /// Get the opcode to materialize
17731 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).
17732 static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
17733 switch (Opcode) {
17734 case ISD::ADD:
17735 case RISCVISD::ADD_VL:
17736 case ISD::OR:
17737 case RISCVISD::OR_VL:
17738 return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL
17739 : RISCVISD::VWADDU_W_VL;
17740 case ISD::SUB:
17741 case RISCVISD::SUB_VL:
17742 return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL
17743 : RISCVISD::VWSUBU_W_VL;
17744 case RISCVISD::FADD_VL:
17745 return RISCVISD::VFWADD_W_VL;
17746 case RISCVISD::FSUB_VL:
17747 return RISCVISD::VFWSUB_W_VL;
17748 default:
17749 llvm_unreachable("Unexpected opcode");
17750 }
17751 }
17752
17753 using CombineToTry = std::function<std::optional<CombineResult>(
17754 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
17755 const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
17756 const RISCVSubtarget &)>;
17757
17758 /// Check if this node needs to be fully folded or extended for all users.
17759 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
17760
17761 void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
17762 const RISCVSubtarget &Subtarget) {
17763 unsigned Opc = OrigOperand.getOpcode();
17764 MVT VT = OrigOperand.getSimpleValueType();
17765
17766 assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
17767 "Unexpected Opcode");
17768
17769 // The pasthru must be undef for tail agnostic.
17770 if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
17771 return;
17772
17773 // Get the scalar value.
17774 SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)
17775 : OrigOperand.getOperand(1);
17776
17777 // See if we have enough sign bits or zero bits in the scalar to use a
17778 // widening opcode by splatting to smaller element size.
17779 unsigned EltBits = VT.getScalarSizeInBits();
17780 unsigned ScalarBits = Op.getValueSizeInBits();
17781 // If we're not getting all bits from the element, we need special handling.
17782 if (ScalarBits < EltBits) {
17783 // This should only occur on RV32.
17784 assert(Opc == RISCVISD::VMV_V_X_VL && EltBits == 64 && ScalarBits == 32 &&
17785 !Subtarget.is64Bit() && "Unexpected splat");
17786 // vmv.v.x sign extends narrow inputs.
17787 SupportsSExt = true;
17788
17789 // If the input is positive, then sign extend is also zero extend.
17790 if (DAG.SignBitIsZero(Op))
17791 SupportsZExt = true;
17792
17793 EnforceOneUse = false;
17794 return;
17795 }
17796
17797 unsigned NarrowSize = EltBits / 2;
17798 // If the narrow type cannot be expressed with a legal VMV,
17799 // this is not a valid candidate.
17800 if (NarrowSize < 8)
17801 return;
17802
17803 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
17804 SupportsSExt = true;
17805
17806 if (DAG.MaskedValueIsZero(Op,
17807 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
17808 SupportsZExt = true;
17809
17810 EnforceOneUse = false;
17811 }
17812
17813 bool isSupportedFPExtend(MVT NarrowEltVT, const RISCVSubtarget &Subtarget) {
17814 return (NarrowEltVT == MVT::f32 ||
17815 (NarrowEltVT == MVT::f16 && Subtarget.hasVInstructionsF16()));
17816 }
17817
17818 bool isSupportedBF16Extend(MVT NarrowEltVT, const RISCVSubtarget &Subtarget) {
17819 return NarrowEltVT == MVT::bf16 && Subtarget.hasStdExtZvfbfwma();
17820 }
17821
17822 /// Helper method to set the various fields of this struct based on the
17823 /// type of \p Root.
17824 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
17825 const RISCVSubtarget &Subtarget) {
17826 SupportsZExt = false;
17827 SupportsSExt = false;
17828 SupportsFPExt = false;
17829 SupportsBF16Ext = false;
17830 EnforceOneUse = true;
17831 unsigned Opc = OrigOperand.getOpcode();
17832 // For the nodes we handle below, we end up using their inputs directly: see
17833 // getSource(). However since they either don't have a passthru or we check
17834 // that their passthru is undef, we can safely ignore their mask and VL.
17835 switch (Opc) {
17836 case ISD::ZERO_EXTEND:
17837 case ISD::SIGN_EXTEND: {
17838 MVT VT = OrigOperand.getSimpleValueType();
17839 if (!VT.isVector())
17840 break;
17841
17842 SDValue NarrowElt = OrigOperand.getOperand(0);
17843 MVT NarrowVT = NarrowElt.getSimpleValueType();
17844 // i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
17845 if (NarrowVT.getVectorElementType() == MVT::i1)
17846 break;
17847
17848 SupportsZExt = Opc == ISD::ZERO_EXTEND;
17849 SupportsSExt = Opc == ISD::SIGN_EXTEND;
17850 break;
17851 }
17852 case RISCVISD::VZEXT_VL:
17853 SupportsZExt = true;
17854 break;
17855 case RISCVISD::VSEXT_VL:
17856 SupportsSExt = true;
17857 break;
17858 case RISCVISD::FP_EXTEND_VL: {
17859 MVT NarrowEltVT =
17861 if (isSupportedFPExtend(NarrowEltVT, Subtarget))
17862 SupportsFPExt = true;
17863 if (isSupportedBF16Extend(NarrowEltVT, Subtarget))
17864 SupportsBF16Ext = true;
17865
17866 break;
17867 }
17868 case ISD::SPLAT_VECTOR:
17869 case RISCVISD::VMV_V_X_VL:
17870 fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
17871 break;
17872 case RISCVISD::VFMV_V_F_VL: {
17873 MVT VT = OrigOperand.getSimpleValueType();
17874
17875 if (!OrigOperand.getOperand(0).isUndef())
17876 break;
17877
17878 SDValue Op = OrigOperand.getOperand(1);
17879 if (Op.getOpcode() != ISD::FP_EXTEND)
17880 break;
17881
17882 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
17883 unsigned ScalarBits = Op.getOperand(0).getValueSizeInBits();
17884 if (NarrowSize != ScalarBits)
17885 break;
17886
17887 if (isSupportedFPExtend(Op.getOperand(0).getSimpleValueType(), Subtarget))
17888 SupportsFPExt = true;
17889 if (isSupportedBF16Extend(Op.getOperand(0).getSimpleValueType(),
17890 Subtarget))
17891 SupportsBF16Ext = true;
17892 break;
17893 }
17894 default:
17895 break;
17896 }
17897 }
17898
17899 /// Check if \p Root supports any extension folding combines.
17900 static bool isSupportedRoot(const SDNode *Root,
17901 const RISCVSubtarget &Subtarget) {
17902 switch (Root->getOpcode()) {
17903 case ISD::ADD:
17904 case ISD::SUB:
17905 case ISD::MUL: {
17906 return Root->getValueType(0).isScalableVector();
17907 }
17908 case ISD::OR: {
17909 return Root->getValueType(0).isScalableVector() &&
17910 Root->getFlags().hasDisjoint();
17911 }
17912 // Vector Widening Integer Add/Sub/Mul Instructions
17913 case RISCVISD::ADD_VL:
17914 case RISCVISD::MUL_VL:
17915 case RISCVISD::VWADD_W_VL:
17916 case RISCVISD::VWADDU_W_VL:
17917 case RISCVISD::SUB_VL:
17918 case RISCVISD::VWSUB_W_VL:
17919 case RISCVISD::VWSUBU_W_VL:
17920 // Vector Widening Floating-Point Add/Sub/Mul Instructions
17921 case RISCVISD::FADD_VL:
17922 case RISCVISD::FSUB_VL:
17923 case RISCVISD::FMUL_VL:
17924 case RISCVISD::VFWADD_W_VL:
17925 case RISCVISD::VFWSUB_W_VL:
17926 return true;
17927 case RISCVISD::OR_VL:
17928 return Root->getFlags().hasDisjoint();
17929 case ISD::SHL:
17930 return Root->getValueType(0).isScalableVector() &&
17931 Subtarget.hasStdExtZvbb();
17932 case RISCVISD::SHL_VL:
17933 return Subtarget.hasStdExtZvbb();
17934 case RISCVISD::VFMADD_VL:
17935 case RISCVISD::VFNMSUB_VL:
17936 case RISCVISD::VFNMADD_VL:
17937 case RISCVISD::VFMSUB_VL:
17938 return true;
17939 default:
17940 return false;
17941 }
17942 }
17943
17944 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
17945 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
17946 const RISCVSubtarget &Subtarget) {
17947 assert(isSupportedRoot(Root, Subtarget) &&
17948 "Trying to build an helper with an "
17949 "unsupported root");
17950 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
17952 OrigOperand = Root->getOperand(OperandIdx);
17953
17954 unsigned Opc = Root->getOpcode();
17955 switch (Opc) {
17956 // We consider
17957 // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))
17958 // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))
17959 // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))
17960 case RISCVISD::VWADD_W_VL:
17961 case RISCVISD::VWADDU_W_VL:
17962 case RISCVISD::VWSUB_W_VL:
17963 case RISCVISD::VWSUBU_W_VL:
17964 case RISCVISD::VFWADD_W_VL:
17965 case RISCVISD::VFWSUB_W_VL:
17966 // Operand 1 can't be changed.
17967 if (OperandIdx == 1)
17968 break;
17969 [[fallthrough]];
17970 default:
17971 fillUpExtensionSupport(Root, DAG, Subtarget);
17972 break;
17973 }
17974 }
17975
17976 /// Helper function to get the Mask and VL from \p Root.
17977 static std::pair<SDValue, SDValue>
17978 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
17979 const RISCVSubtarget &Subtarget) {
17980 assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
17981 switch (Root->getOpcode()) {
17982 case ISD::ADD:
17983 case ISD::SUB:
17984 case ISD::MUL:
17985 case ISD::OR:
17986 case ISD::SHL: {
17987 SDLoc DL(Root);
17988 MVT VT = Root->getSimpleValueType(0);
17989 return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
17990 }
17991 default:
17992 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
17993 }
17994 }
17995
17996 /// Helper function to check if \p N is commutative with respect to the
17997 /// foldings that are supported by this class.
17998 static bool isCommutative(const SDNode *N) {
17999 switch (N->getOpcode()) {
18000 case ISD::ADD:
18001 case ISD::MUL:
18002 case ISD::OR:
18003 case RISCVISD::ADD_VL:
18004 case RISCVISD::MUL_VL:
18005 case RISCVISD::OR_VL:
18006 case RISCVISD::FADD_VL:
18007 case RISCVISD::FMUL_VL:
18008 case RISCVISD::VFMADD_VL:
18009 case RISCVISD::VFNMSUB_VL:
18010 case RISCVISD::VFNMADD_VL:
18011 case RISCVISD::VFMSUB_VL:
18012 return true;
18013 case RISCVISD::VWADD_W_VL:
18014 case RISCVISD::VWADDU_W_VL:
18015 case ISD::SUB:
18016 case RISCVISD::SUB_VL:
18017 case RISCVISD::VWSUB_W_VL:
18018 case RISCVISD::VWSUBU_W_VL:
18019 case RISCVISD::VFWADD_W_VL:
18020 case RISCVISD::FSUB_VL:
18021 case RISCVISD::VFWSUB_W_VL:
18022 case ISD::SHL:
18023 case RISCVISD::SHL_VL:
18024 return false;
18025 default:
18026 llvm_unreachable("Unexpected opcode");
18027 }
18028 }
18029
18030 /// Get a list of combine to try for folding extensions in \p Root.
18031 /// Note that each returned CombineToTry function doesn't actually modify
18032 /// anything. Instead they produce an optional CombineResult that if not None,
18033 /// need to be materialized for the combine to be applied.
18034 /// \see CombineResult::materialize.
18035 /// If the related CombineToTry function returns std::nullopt, that means the
18036 /// combine didn't match.
18037 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
18038};
18039
18040/// Helper structure that holds all the necessary information to materialize a
18041/// combine that does some extension folding.
18042struct CombineResult {
18043 /// Opcode to be generated when materializing the combine.
18044 unsigned TargetOpcode;
18045 // No value means no extension is needed.
18046 std::optional<ExtKind> LHSExt;
18047 std::optional<ExtKind> RHSExt;
18048 /// Root of the combine.
18049 SDNode *Root;
18050 /// LHS of the TargetOpcode.
18051 NodeExtensionHelper LHS;
18052 /// RHS of the TargetOpcode.
18053 NodeExtensionHelper RHS;
18054
18055 CombineResult(unsigned TargetOpcode, SDNode *Root,
18056 const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
18057 const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
18058 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),
18059 LHS(LHS), RHS(RHS) {}
18060
18061 /// Return a value that uses TargetOpcode and that can be used to replace
18062 /// Root.
18063 /// The actual replacement is *not* done in that method.
18064 SDValue materialize(SelectionDAG &DAG,
18065 const RISCVSubtarget &Subtarget) const {
18066 SDValue Mask, VL, Passthru;
18067 std::tie(Mask, VL) =
18068 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
18069 switch (Root->getOpcode()) {
18070 default:
18071 Passthru = Root->getOperand(2);
18072 break;
18073 case ISD::ADD:
18074 case ISD::SUB:
18075 case ISD::MUL:
18076 case ISD::OR:
18077 case ISD::SHL:
18078 Passthru = DAG.getUNDEF(Root->getValueType(0));
18079 break;
18080 }
18081 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
18082 LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),
18083 RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),
18084 Passthru, Mask, VL);
18085 }
18086};
18087
18088/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
18089/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
18090/// are zext) and LHS and RHS can be folded into Root.
18091/// AllowExtMask define which form `ext` can take in this pattern.
18092///
18093/// \note If the pattern can match with both zext and sext, the returned
18094/// CombineResult will feature the zext result.
18095///
18096/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
18097/// can be used to apply the pattern.
18098static std::optional<CombineResult>
18099canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
18100 const NodeExtensionHelper &RHS,
18101 uint8_t AllowExtMask, SelectionDAG &DAG,
18102 const RISCVSubtarget &Subtarget) {
18103 if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)
18104 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
18105 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
18106 /*RHSExt=*/{ExtKind::ZExt});
18107 if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)
18108 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
18109 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
18110 /*RHSExt=*/{ExtKind::SExt});
18111 if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)
18112 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
18113 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
18114 /*RHSExt=*/{ExtKind::FPExt});
18115 if ((AllowExtMask & ExtKind::BF16Ext) && LHS.SupportsBF16Ext &&
18116 RHS.SupportsBF16Ext)
18117 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
18118 Root, LHS, /*LHSExt=*/{ExtKind::BF16Ext}, RHS,
18119 /*RHSExt=*/{ExtKind::BF16Ext});
18120 return std::nullopt;
18121}
18122
18123/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
18124/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
18125/// are zext) and LHS and RHS can be folded into Root.
18126///
18127/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
18128/// can be used to apply the pattern.
18129static std::optional<CombineResult>
18130canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
18131 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
18132 const RISCVSubtarget &Subtarget) {
18133 return canFoldToVWWithSameExtensionImpl(
18134 Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,
18135 Subtarget);
18136}
18137
18138/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
18139///
18140/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
18141/// can be used to apply the pattern.
18142static std::optional<CombineResult>
18143canFoldToVWWithSameExtZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
18144 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
18145 const RISCVSubtarget &Subtarget) {
18146 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,
18147 Subtarget);
18148}
18149
18150/// Check if \p Root follows a pattern Root(bf16ext(LHS), bf16ext(RHS))
18151///
18152/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
18153/// can be used to apply the pattern.
18154static std::optional<CombineResult>
18155canFoldToVWWithSameExtBF16(SDNode *Root, const NodeExtensionHelper &LHS,
18156 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
18157 const RISCVSubtarget &Subtarget) {
18158 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::BF16Ext, DAG,
18159 Subtarget);
18160}
18161
18162/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
18163///
18164/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
18165/// can be used to apply the pattern.
18166static std::optional<CombineResult>
18167canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
18168 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
18169 const RISCVSubtarget &Subtarget) {
18170 if (RHS.SupportsFPExt)
18171 return CombineResult(
18172 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),
18173 Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt});
18174
18175 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
18176 // sext/zext?
18177 // Control this behavior behind an option (AllowSplatInVW_W) for testing
18178 // purposes.
18179 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
18180 return CombineResult(
18181 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,
18182 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt});
18183 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
18184 return CombineResult(
18185 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,
18186 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt});
18187 return std::nullopt;
18188}
18189
18190/// Check if \p Root follows a pattern Root(sext(LHS), RHS)
18191///
18192/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
18193/// can be used to apply the pattern.
18194static std::optional<CombineResult>
18195canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
18196 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
18197 const RISCVSubtarget &Subtarget) {
18198 if (LHS.SupportsSExt)
18199 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
18200 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
18201 /*RHSExt=*/std::nullopt);
18202 return std::nullopt;
18203}
18204
18205/// Check if \p Root follows a pattern Root(zext(LHS), RHS)
18206///
18207/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
18208/// can be used to apply the pattern.
18209static std::optional<CombineResult>
18210canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
18211 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
18212 const RISCVSubtarget &Subtarget) {
18213 if (LHS.SupportsZExt)
18214 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
18215 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
18216 /*RHSExt=*/std::nullopt);
18217 return std::nullopt;
18218}
18219
18220/// Check if \p Root follows a pattern Root(fpext(LHS), RHS)
18221///
18222/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
18223/// can be used to apply the pattern.
18224static std::optional<CombineResult>
18225canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS,
18226 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
18227 const RISCVSubtarget &Subtarget) {
18228 if (LHS.SupportsFPExt)
18229 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
18230 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
18231 /*RHSExt=*/std::nullopt);
18232 return std::nullopt;
18233}
18234
18235/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
18236///
18237/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
18238/// can be used to apply the pattern.
18239static std::optional<CombineResult>
18240canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
18241 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
18242 const RISCVSubtarget &Subtarget) {
18243
18244 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
18245 return std::nullopt;
18246 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
18247 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
18248 /*RHSExt=*/{ExtKind::ZExt});
18249}
18250
18252NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
18253 SmallVector<CombineToTry> Strategies;
18254 switch (Root->getOpcode()) {
18255 case ISD::ADD:
18256 case ISD::SUB:
18257 case ISD::OR:
18258 case RISCVISD::ADD_VL:
18259 case RISCVISD::SUB_VL:
18260 case RISCVISD::OR_VL:
18261 case RISCVISD::FADD_VL:
18262 case RISCVISD::FSUB_VL:
18263 // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub
18264 Strategies.push_back(canFoldToVWWithSameExtension);
18265 // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w
18266 Strategies.push_back(canFoldToVW_W);
18267 break;
18268 case RISCVISD::FMUL_VL:
18269 case RISCVISD::VFMADD_VL:
18270 case RISCVISD::VFMSUB_VL:
18271 case RISCVISD::VFNMADD_VL:
18272 case RISCVISD::VFNMSUB_VL:
18273 Strategies.push_back(canFoldToVWWithSameExtension);
18274 if (Root->getOpcode() == RISCVISD::VFMADD_VL)
18275 Strategies.push_back(canFoldToVWWithSameExtBF16);
18276 break;
18277 case ISD::MUL:
18278 case RISCVISD::MUL_VL:
18279 // mul -> vwmul(u)
18280 Strategies.push_back(canFoldToVWWithSameExtension);
18281 // mul -> vwmulsu
18282 Strategies.push_back(canFoldToVW_SU);
18283 break;
18284 case ISD::SHL:
18285 case RISCVISD::SHL_VL:
18286 // shl -> vwsll
18287 Strategies.push_back(canFoldToVWWithSameExtZEXT);
18288 break;
18289 case RISCVISD::VWADD_W_VL:
18290 case RISCVISD::VWSUB_W_VL:
18291 // vwadd_w|vwsub_w -> vwadd|vwsub
18292 Strategies.push_back(canFoldToVWWithSEXT);
18293 break;
18294 case RISCVISD::VWADDU_W_VL:
18295 case RISCVISD::VWSUBU_W_VL:
18296 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
18297 Strategies.push_back(canFoldToVWWithZEXT);
18298 break;
18299 case RISCVISD::VFWADD_W_VL:
18300 case RISCVISD::VFWSUB_W_VL:
18301 // vfwadd_w|vfwsub_w -> vfwadd|vfwsub
18302 Strategies.push_back(canFoldToVWWithFPEXT);
18303 break;
18304 default:
18305 llvm_unreachable("Unexpected opcode");
18306 }
18307 return Strategies;
18308}
18309} // End anonymous namespace.
18310
18312 // TODO: Extend this to other binops using generic identity logic
18313 assert(N->getOpcode() == RISCVISD::ADD_VL);
18314 SDValue A = N->getOperand(0);
18315 SDValue B = N->getOperand(1);
18316 SDValue Passthru = N->getOperand(2);
18317 if (!Passthru.isUndef())
18318 // TODO:This could be a vmerge instead
18319 return SDValue();
18320 ;
18322 return A;
18323 // Peek through fixed to scalable
18324 if (B.getOpcode() == ISD::INSERT_SUBVECTOR && B.getOperand(0).isUndef() &&
18325 ISD::isConstantSplatVectorAllZeros(B.getOperand(1).getNode()))
18326 return A;
18327 return SDValue();
18328}
18329
18330/// Combine a binary or FMA operation to its equivalent VW or VW_W form.
18331/// The supported combines are:
18332/// add | add_vl | or disjoint | or_vl disjoint -> vwadd(u) | vwadd(u)_w
18333/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
18334/// mul | mul_vl -> vwmul(u) | vwmul_su
18335/// shl | shl_vl -> vwsll
18336/// fadd_vl -> vfwadd | vfwadd_w
18337/// fsub_vl -> vfwsub | vfwsub_w
18338/// fmul_vl -> vfwmul
18339/// vwadd_w(u) -> vwadd(u)
18340/// vwsub_w(u) -> vwsub(u)
18341/// vfwadd_w -> vfwadd
18342/// vfwsub_w -> vfwsub
18345 const RISCVSubtarget &Subtarget) {
18346 SelectionDAG &DAG = DCI.DAG;
18347 if (DCI.isBeforeLegalize())
18348 return SDValue();
18349
18350 if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))
18351 return SDValue();
18352
18353 SmallVector<SDNode *> Worklist;
18354 SmallPtrSet<SDNode *, 8> Inserted;
18355 SmallPtrSet<SDNode *, 8> ExtensionsToRemove;
18356 Worklist.push_back(N);
18357 Inserted.insert(N);
18358 SmallVector<CombineResult> CombinesToApply;
18359
18360 while (!Worklist.empty()) {
18361 SDNode *Root = Worklist.pop_back_val();
18362
18363 NodeExtensionHelper LHS(Root, 0, DAG, Subtarget);
18364 NodeExtensionHelper RHS(Root, 1, DAG, Subtarget);
18365 auto AppendUsersIfNeeded =
18366 [&Worklist, &Subtarget, &Inserted,
18367 &ExtensionsToRemove](const NodeExtensionHelper &Op) {
18368 if (Op.needToPromoteOtherUsers()) {
18369 // Remember that we're supposed to remove this extension.
18370 ExtensionsToRemove.insert(Op.OrigOperand.getNode());
18371 for (SDUse &Use : Op.OrigOperand->uses()) {
18372 SDNode *TheUser = Use.getUser();
18373 if (!NodeExtensionHelper::isSupportedRoot(TheUser, Subtarget))
18374 return false;
18375 // We only support the first 2 operands of FMA.
18376 if (Use.getOperandNo() >= 2)
18377 return false;
18378 if (Inserted.insert(TheUser).second)
18379 Worklist.push_back(TheUser);
18380 }
18381 }
18382 return true;
18383 };
18384
18385 // Control the compile time by limiting the number of node we look at in
18386 // total.
18387 if (Inserted.size() > ExtensionMaxWebSize)
18388 return SDValue();
18389
18391 NodeExtensionHelper::getSupportedFoldings(Root);
18392
18393 assert(!FoldingStrategies.empty() && "Nothing to be folded");
18394 bool Matched = false;
18395 for (int Attempt = 0;
18396 (Attempt != 1 + NodeExtensionHelper::isCommutative(Root)) && !Matched;
18397 ++Attempt) {
18398
18399 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
18400 FoldingStrategies) {
18401 std::optional<CombineResult> Res =
18402 FoldingStrategy(Root, LHS, RHS, DAG, Subtarget);
18403 if (Res) {
18404 // If this strategy wouldn't remove an extension we're supposed to
18405 // remove, reject it.
18406 if (!Res->LHSExt.has_value() &&
18407 ExtensionsToRemove.contains(LHS.OrigOperand.getNode()))
18408 continue;
18409 if (!Res->RHSExt.has_value() &&
18410 ExtensionsToRemove.contains(RHS.OrigOperand.getNode()))
18411 continue;
18412
18413 Matched = true;
18414 CombinesToApply.push_back(*Res);
18415 // All the inputs that are extended need to be folded, otherwise
18416 // we would be leaving the old input (since it is may still be used),
18417 // and the new one.
18418 if (Res->LHSExt.has_value())
18419 if (!AppendUsersIfNeeded(LHS))
18420 return SDValue();
18421 if (Res->RHSExt.has_value())
18422 if (!AppendUsersIfNeeded(RHS))
18423 return SDValue();
18424 break;
18425 }
18426 }
18427 std::swap(LHS, RHS);
18428 }
18429 // Right now we do an all or nothing approach.
18430 if (!Matched)
18431 return SDValue();
18432 }
18433 // Store the value for the replacement of the input node separately.
18434 SDValue InputRootReplacement;
18435 // We do the RAUW after we materialize all the combines, because some replaced
18436 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
18437 // some of these nodes may appear in the NodeExtensionHelpers of some of the
18438 // yet-to-be-visited CombinesToApply roots.
18440 ValuesToReplace.reserve(CombinesToApply.size());
18441 for (CombineResult Res : CombinesToApply) {
18442 SDValue NewValue = Res.materialize(DAG, Subtarget);
18443 if (!InputRootReplacement) {
18444 assert(Res.Root == N &&
18445 "First element is expected to be the current node");
18446 InputRootReplacement = NewValue;
18447 } else {
18448 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
18449 }
18450 }
18451 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
18452 DCI.CombineTo(OldNewValues.first.getNode(), OldNewValues.second);
18453 }
18454 return InputRootReplacement;
18455}
18456
18457// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
18458// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
18459// y will be the Passthru and cond will be the Mask.
18461 unsigned Opc = N->getOpcode();
18462 assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||
18463 Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);
18464
18465 SDValue Y = N->getOperand(0);
18466 SDValue MergeOp = N->getOperand(1);
18467 unsigned MergeOpc = MergeOp.getOpcode();
18468
18469 if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
18470 return SDValue();
18471
18472 SDValue X = MergeOp->getOperand(1);
18473
18474 if (!MergeOp.hasOneUse())
18475 return SDValue();
18476
18477 // Passthru should be undef
18478 SDValue Passthru = N->getOperand(2);
18479 if (!Passthru.isUndef())
18480 return SDValue();
18481
18482 // Mask should be all ones
18483 SDValue Mask = N->getOperand(3);
18484 if (Mask.getOpcode() != RISCVISD::VMSET_VL)
18485 return SDValue();
18486
18487 // False value of MergeOp should be all zeros
18488 SDValue Z = MergeOp->getOperand(2);
18489
18490 if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
18491 (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))
18492 Z = Z.getOperand(1);
18493
18494 if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))
18495 return SDValue();
18496
18497 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),
18498 {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},
18499 N->getFlags());
18500}
18501
18504 const RISCVSubtarget &Subtarget) {
18505 [[maybe_unused]] unsigned Opc = N->getOpcode();
18506 assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||
18507 Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);
18508
18509 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
18510 return V;
18511
18512 return combineVWADDSUBWSelect(N, DCI.DAG);
18513}
18514
18515// Helper function for performMemPairCombine.
18516// Try to combine the memory loads/stores LSNode1 and LSNode2
18517// into a single memory pair operation.
18519 LSBaseSDNode *LSNode2, SDValue BasePtr,
18520 uint64_t Imm) {
18522 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
18523
18524 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
18525 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
18526 return SDValue();
18527
18529 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
18530
18531 // The new operation has twice the width.
18532 MVT XLenVT = Subtarget.getXLenVT();
18533 EVT MemVT = LSNode1->getMemoryVT();
18534 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
18535 MachineMemOperand *MMO = LSNode1->getMemOperand();
18537 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
18538
18539 if (LSNode1->getOpcode() == ISD::LOAD) {
18540 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
18541 unsigned Opcode;
18542 if (MemVT == MVT::i32)
18543 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
18544 else
18545 Opcode = RISCVISD::TH_LDD;
18546
18547 SDValue Res = DAG.getMemIntrinsicNode(
18548 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
18549 {LSNode1->getChain(), BasePtr,
18550 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
18551 NewMemVT, NewMMO);
18552
18553 SDValue Node1 =
18554 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
18555 SDValue Node2 =
18556 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
18557
18558 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
18559 return Node1;
18560 } else {
18561 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
18562
18563 SDValue Res = DAG.getMemIntrinsicNode(
18564 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
18565 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
18566 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
18567 NewMemVT, NewMMO);
18568
18569 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
18570 return Res;
18571 }
18572}
18573
18574// Try to combine two adjacent loads/stores to a single pair instruction from
18575// the XTHeadMemPair vendor extension.
18578 SelectionDAG &DAG = DCI.DAG;
18580 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
18581
18582 // Target does not support load/store pair.
18583 if (!Subtarget.hasVendorXTHeadMemPair())
18584 return SDValue();
18585
18586 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
18587 EVT MemVT = LSNode1->getMemoryVT();
18588 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
18589
18590 // No volatile, indexed or atomic loads/stores.
18591 if (!LSNode1->isSimple() || LSNode1->isIndexed())
18592 return SDValue();
18593
18594 // Function to get a base + constant representation from a memory value.
18595 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
18596 if (Ptr->getOpcode() == ISD::ADD)
18597 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
18598 return {Ptr->getOperand(0), C1->getZExtValue()};
18599 return {Ptr, 0};
18600 };
18601
18602 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
18603
18604 SDValue Chain = N->getOperand(0);
18605 for (SDUse &Use : Chain->uses()) {
18606 if (Use.getUser() != N && Use.getResNo() == 0 &&
18607 Use.getUser()->getOpcode() == N->getOpcode()) {
18609
18610 // No volatile, indexed or atomic loads/stores.
18611 if (!LSNode2->isSimple() || LSNode2->isIndexed())
18612 continue;
18613
18614 // Check if LSNode1 and LSNode2 have the same type and extension.
18615 if (LSNode1->getOpcode() == ISD::LOAD)
18616 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
18618 continue;
18619
18620 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
18621 continue;
18622
18623 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
18624
18625 // Check if the base pointer is the same for both instruction.
18626 if (Base1 != Base2)
18627 continue;
18628
18629 // Check if the offsets match the XTHeadMemPair encoding constraints.
18630 bool Valid = false;
18631 if (MemVT == MVT::i32) {
18632 // Check for adjacent i32 values and a 2-bit index.
18633 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
18634 Valid = true;
18635 } else if (MemVT == MVT::i64) {
18636 // Check for adjacent i64 values and a 2-bit index.
18637 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
18638 Valid = true;
18639 }
18640
18641 if (!Valid)
18642 continue;
18643
18644 // Try to combine.
18645 if (SDValue Res =
18646 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
18647 return Res;
18648 }
18649 }
18650
18651 return SDValue();
18652}
18653
18654// Fold
18655// (fp_to_int (froundeven X)) -> fcvt X, rne
18656// (fp_to_int (ftrunc X)) -> fcvt X, rtz
18657// (fp_to_int (ffloor X)) -> fcvt X, rdn
18658// (fp_to_int (fceil X)) -> fcvt X, rup
18659// (fp_to_int (fround X)) -> fcvt X, rmm
18660// (fp_to_int (frint X)) -> fcvt X
18663 const RISCVSubtarget &Subtarget) {
18664 SelectionDAG &DAG = DCI.DAG;
18665 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18666 MVT XLenVT = Subtarget.getXLenVT();
18667
18668 SDValue Src = N->getOperand(0);
18669
18670 // Don't do this for strict-fp Src.
18671 if (Src->isStrictFPOpcode())
18672 return SDValue();
18673
18674 // Ensure the FP type is legal.
18675 if (!TLI.isTypeLegal(Src.getValueType()))
18676 return SDValue();
18677
18678 // Don't do this for f16 with Zfhmin and not Zfh.
18679 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
18680 return SDValue();
18681
18682 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
18683 // If the result is invalid, we didn't find a foldable instruction.
18684 if (FRM == RISCVFPRndMode::Invalid)
18685 return SDValue();
18686
18687 SDLoc DL(N);
18688 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
18689 EVT VT = N->getValueType(0);
18690
18691 if (VT.isVector() && TLI.isTypeLegal(VT)) {
18692 MVT SrcVT = Src.getSimpleValueType();
18693 MVT SrcContainerVT = SrcVT;
18694 MVT ContainerVT = VT.getSimpleVT();
18695 SDValue XVal = Src.getOperand(0);
18696
18697 // For widening and narrowing conversions we just combine it into a
18698 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
18699 // end up getting lowered to their appropriate pseudo instructions based on
18700 // their operand types
18701 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
18702 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
18703 return SDValue();
18704
18705 // Make fixed-length vectors scalable first
18706 if (SrcVT.isFixedLengthVector()) {
18707 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
18708 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
18709 ContainerVT =
18710 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
18711 }
18712
18713 auto [Mask, VL] =
18714 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
18715
18716 SDValue FpToInt;
18717 if (FRM == RISCVFPRndMode::RTZ) {
18718 // Use the dedicated trunc static rounding mode if we're truncating so we
18719 // don't need to generate calls to fsrmi/fsrm
18720 unsigned Opc =
18721 IsSigned ? RISCVISD::VFCVT_RTZ_X_F_VL : RISCVISD::VFCVT_RTZ_XU_F_VL;
18722 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
18723 } else {
18724 unsigned Opc =
18725 IsSigned ? RISCVISD::VFCVT_RM_X_F_VL : RISCVISD::VFCVT_RM_XU_F_VL;
18726 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
18727 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
18728 }
18729
18730 // If converted from fixed-length to scalable, convert back
18731 if (VT.isFixedLengthVector())
18732 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
18733
18734 return FpToInt;
18735 }
18736
18737 // Only handle XLen or i32 types. Other types narrower than XLen will
18738 // eventually be legalized to XLenVT.
18739 if (VT != MVT::i32 && VT != XLenVT)
18740 return SDValue();
18741
18742 unsigned Opc;
18743 if (VT == XLenVT)
18744 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
18745 else
18746 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
18747
18748 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
18749 DAG.getTargetConstant(FRM, DL, XLenVT));
18750 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
18751}
18752
18753// Fold
18754// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
18755// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
18756// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
18757// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
18758// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
18759// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
18762 const RISCVSubtarget &Subtarget) {
18763 SelectionDAG &DAG = DCI.DAG;
18764 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18765 MVT XLenVT = Subtarget.getXLenVT();
18766
18767 // Only handle XLen types. Other types narrower than XLen will eventually be
18768 // legalized to XLenVT.
18769 EVT DstVT = N->getValueType(0);
18770 if (DstVT != XLenVT)
18771 return SDValue();
18772
18773 SDValue Src = N->getOperand(0);
18774
18775 // Don't do this for strict-fp Src.
18776 if (Src->isStrictFPOpcode())
18777 return SDValue();
18778
18779 // Ensure the FP type is also legal.
18780 if (!TLI.isTypeLegal(Src.getValueType()))
18781 return SDValue();
18782
18783 // Don't do this for f16 with Zfhmin and not Zfh.
18784 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
18785 return SDValue();
18786
18787 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
18788
18789 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
18790 if (FRM == RISCVFPRndMode::Invalid)
18791 return SDValue();
18792
18793 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
18794
18795 unsigned Opc;
18796 if (SatVT == DstVT)
18797 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
18798 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
18799 Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
18800 else
18801 return SDValue();
18802 // FIXME: Support other SatVTs by clamping before or after the conversion.
18803
18804 Src = Src.getOperand(0);
18805
18806 SDLoc DL(N);
18807 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
18808 DAG.getTargetConstant(FRM, DL, XLenVT));
18809
18810 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
18811 // extend.
18812 if (Opc == RISCVISD::FCVT_WU_RV64)
18813 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
18814
18815 // RISC-V FP-to-int conversions saturate to the destination register size, but
18816 // don't produce 0 for nan.
18817 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
18818 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
18819}
18820
18821// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
18822// smaller than XLenVT.
18824 const RISCVSubtarget &Subtarget) {
18825 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
18826
18827 SDValue Src = N->getOperand(0);
18828 if (Src.getOpcode() != ISD::BSWAP)
18829 return SDValue();
18830
18831 EVT VT = N->getValueType(0);
18832 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
18834 return SDValue();
18835
18836 SDLoc DL(N);
18837 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
18838}
18839
18841 const RISCVSubtarget &Subtarget) {
18842 // Fold:
18843 // vp.reverse(vp.load(ADDR, MASK)) -> vp.strided.load(ADDR, -1, MASK)
18844
18845 // Check if its first operand is a vp.load.
18846 auto *VPLoad = dyn_cast<VPLoadSDNode>(N->getOperand(0));
18847 if (!VPLoad)
18848 return SDValue();
18849
18850 EVT LoadVT = VPLoad->getValueType(0);
18851 // We do not have a strided_load version for masks, and the evl of vp.reverse
18852 // and vp.load should always be the same.
18853 if (!LoadVT.getVectorElementType().isByteSized() ||
18854 N->getOperand(2) != VPLoad->getVectorLength() ||
18855 !N->getOperand(0).hasOneUse())
18856 return SDValue();
18857
18858 // Check if the mask of outer vp.reverse are all 1's.
18859 if (!isOneOrOneSplat(N->getOperand(1)))
18860 return SDValue();
18861
18862 SDValue LoadMask = VPLoad->getMask();
18863 // If Mask is all ones, then load is unmasked and can be reversed.
18864 if (!isOneOrOneSplat(LoadMask)) {
18865 // If the mask is not all ones, we can reverse the load if the mask was also
18866 // reversed by an unmasked vp.reverse with the same EVL.
18867 if (LoadMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
18868 !isOneOrOneSplat(LoadMask.getOperand(1)) ||
18869 LoadMask.getOperand(2) != VPLoad->getVectorLength())
18870 return SDValue();
18871 LoadMask = LoadMask.getOperand(0);
18872 }
18873
18874 // Base = LoadAddr + (NumElem - 1) * ElemWidthByte
18875 SDLoc DL(N);
18876 MVT XLenVT = Subtarget.getXLenVT();
18877 SDValue NumElem = VPLoad->getVectorLength();
18878 uint64_t ElemWidthByte = VPLoad->getValueType(0).getScalarSizeInBits() / 8;
18879
18880 SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem,
18881 DAG.getConstant(1, DL, XLenVT));
18882 SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1,
18883 DAG.getConstant(ElemWidthByte, DL, XLenVT));
18884 SDValue Base = DAG.getNode(ISD::ADD, DL, XLenVT, VPLoad->getBasePtr(), Temp2);
18885 SDValue Stride = DAG.getSignedConstant(-ElemWidthByte, DL, XLenVT);
18886
18888 MachinePointerInfo PtrInfo(VPLoad->getAddressSpace());
18890 PtrInfo, VPLoad->getMemOperand()->getFlags(),
18891 LocationSize::beforeOrAfterPointer(), VPLoad->getAlign());
18892
18893 SDValue Ret = DAG.getStridedLoadVP(
18894 LoadVT, DL, VPLoad->getChain(), Base, Stride, LoadMask,
18895 VPLoad->getVectorLength(), MMO, VPLoad->isExpandingLoad());
18896
18897 DAG.ReplaceAllUsesOfValueWith(SDValue(VPLoad, 1), Ret.getValue(1));
18898
18899 return Ret;
18900}
18901
18903 const RISCVSubtarget &Subtarget) {
18904 // Fold:
18905 // vp.store(vp.reverse(VAL), ADDR, MASK) -> vp.strided.store(VAL, NEW_ADDR,
18906 // -1, MASK)
18907 auto *VPStore = cast<VPStoreSDNode>(N);
18908
18909 if (VPStore->getValue().getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE)
18910 return SDValue();
18911
18912 SDValue VPReverse = VPStore->getValue();
18913 EVT ReverseVT = VPReverse->getValueType(0);
18914
18915 // We do not have a strided_store version for masks, and the evl of vp.reverse
18916 // and vp.store should always be the same.
18917 if (!ReverseVT.getVectorElementType().isByteSized() ||
18918 VPStore->getVectorLength() != VPReverse.getOperand(2) ||
18919 !VPReverse.hasOneUse())
18920 return SDValue();
18921
18922 SDValue StoreMask = VPStore->getMask();
18923 // If Mask is all ones, then load is unmasked and can be reversed.
18924 if (!isOneOrOneSplat(StoreMask)) {
18925 // If the mask is not all ones, we can reverse the store if the mask was
18926 // also reversed by an unmasked vp.reverse with the same EVL.
18927 if (StoreMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
18928 !isOneOrOneSplat(StoreMask.getOperand(1)) ||
18929 StoreMask.getOperand(2) != VPStore->getVectorLength())
18930 return SDValue();
18931 StoreMask = StoreMask.getOperand(0);
18932 }
18933
18934 // Base = StoreAddr + (NumElem - 1) * ElemWidthByte
18935 SDLoc DL(N);
18936 MVT XLenVT = Subtarget.getXLenVT();
18937 SDValue NumElem = VPStore->getVectorLength();
18938 uint64_t ElemWidthByte = VPReverse.getValueType().getScalarSizeInBits() / 8;
18939
18940 SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem,
18941 DAG.getConstant(1, DL, XLenVT));
18942 SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1,
18943 DAG.getConstant(ElemWidthByte, DL, XLenVT));
18944 SDValue Base =
18945 DAG.getNode(ISD::ADD, DL, XLenVT, VPStore->getBasePtr(), Temp2);
18946 SDValue Stride = DAG.getSignedConstant(-ElemWidthByte, DL, XLenVT);
18947
18949 MachinePointerInfo PtrInfo(VPStore->getAddressSpace());
18951 PtrInfo, VPStore->getMemOperand()->getFlags(),
18952 LocationSize::beforeOrAfterPointer(), VPStore->getAlign());
18953
18954 return DAG.getStridedStoreVP(
18955 VPStore->getChain(), DL, VPReverse.getOperand(0), Base,
18956 VPStore->getOffset(), Stride, StoreMask, VPStore->getVectorLength(),
18957 VPStore->getMemoryVT(), MMO, VPStore->getAddressingMode(),
18958 VPStore->isTruncatingStore(), VPStore->isCompressingStore());
18959}
18960
18961// Peephole avgceil pattern.
18962// %1 = zext <N x i8> %a to <N x i32>
18963// %2 = zext <N x i8> %b to <N x i32>
18964// %3 = add nuw nsw <N x i32> %1, splat (i32 1)
18965// %4 = add nuw nsw <N x i32> %3, %2
18966// %5 = lshr <N x i32> %4, splat (i32 1)
18967// %6 = trunc <N x i32> %5 to <N x i8>
18969 const RISCVSubtarget &Subtarget) {
18970 EVT VT = N->getValueType(0);
18971
18972 // Ignore fixed vectors.
18973 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18974 if (!VT.isScalableVector() || !TLI.isTypeLegal(VT))
18975 return SDValue();
18976
18977 SDValue In = N->getOperand(0);
18978 SDValue Mask = N->getOperand(1);
18979 SDValue VL = N->getOperand(2);
18980
18981 // Input should be a vp_srl with same mask and VL.
18982 if (In.getOpcode() != ISD::VP_SRL || In.getOperand(2) != Mask ||
18983 In.getOperand(3) != VL)
18984 return SDValue();
18985
18986 // Shift amount should be 1.
18987 if (!isOneOrOneSplat(In.getOperand(1)))
18988 return SDValue();
18989
18990 // Shifted value should be a vp_add with same mask and VL.
18991 SDValue LHS = In.getOperand(0);
18992 if (LHS.getOpcode() != ISD::VP_ADD || LHS.getOperand(2) != Mask ||
18993 LHS.getOperand(3) != VL)
18994 return SDValue();
18995
18996 SDValue Operands[3];
18997
18998 // Matches another VP_ADD with same VL and Mask.
18999 auto FindAdd = [&](SDValue V, SDValue Other) {
19000 if (V.getOpcode() != ISD::VP_ADD || V.getOperand(2) != Mask ||
19001 V.getOperand(3) != VL)
19002 return false;
19003
19004 Operands[0] = Other;
19005 Operands[1] = V.getOperand(1);
19006 Operands[2] = V.getOperand(0);
19007 return true;
19008 };
19009
19010 // We need to find another VP_ADD in one of the operands.
19011 SDValue LHS0 = LHS.getOperand(0);
19012 SDValue LHS1 = LHS.getOperand(1);
19013 if (!FindAdd(LHS0, LHS1) && !FindAdd(LHS1, LHS0))
19014 return SDValue();
19015
19016 // Now we have three operands of two additions. Check that one of them is a
19017 // constant vector with ones.
19018 auto I = llvm::find_if(Operands,
19019 [](const SDValue &Op) { return isOneOrOneSplat(Op); });
19020 if (I == std::end(Operands))
19021 return SDValue();
19022 // We found a vector with ones, move if it to the end of the Operands array.
19023 std::swap(*I, Operands[2]);
19024
19025 // Make sure the other 2 operands can be promoted from the result type.
19026 for (SDValue Op : drop_end(Operands)) {
19027 if (Op.getOpcode() != ISD::VP_ZERO_EXTEND || Op.getOperand(1) != Mask ||
19028 Op.getOperand(2) != VL)
19029 return SDValue();
19030 // Input must be the same size or smaller than our result.
19031 if (Op.getOperand(0).getScalarValueSizeInBits() > VT.getScalarSizeInBits())
19032 return SDValue();
19033 }
19034
19035 // Pattern is detected.
19036 // Rebuild the zero extends in case the inputs are smaller than our result.
19037 SDValue NewOp0 = DAG.getNode(ISD::VP_ZERO_EXTEND, SDLoc(Operands[0]), VT,
19038 Operands[0].getOperand(0), Mask, VL);
19039 SDValue NewOp1 = DAG.getNode(ISD::VP_ZERO_EXTEND, SDLoc(Operands[1]), VT,
19040 Operands[1].getOperand(0), Mask, VL);
19041 // Build a AVGCEILU_VL which will be selected as a VAADDU with RNU rounding
19042 // mode.
19043 SDLoc DL(N);
19044 return DAG.getNode(RISCVISD::AVGCEILU_VL, DL, VT,
19045 {NewOp0, NewOp1, DAG.getUNDEF(VT), Mask, VL});
19046}
19047
19048// Convert from one FMA opcode to another based on whether we are negating the
19049// multiply result and/or the accumulator.
19050// NOTE: Only supports RVV operations with VL.
19051static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
19052 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
19053 if (NegMul) {
19054 // clang-format off
19055 switch (Opcode) {
19056 default: llvm_unreachable("Unexpected opcode");
19057 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
19058 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
19059 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
19060 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
19061 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
19062 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;
19063 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;
19064 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
19065 }
19066 // clang-format on
19067 }
19068
19069 // Negating the accumulator changes ADD<->SUB.
19070 if (NegAcc) {
19071 // clang-format off
19072 switch (Opcode) {
19073 default: llvm_unreachable("Unexpected opcode");
19074 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
19075 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
19076 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
19077 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
19078 case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;
19079 case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;
19080 case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
19081 case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
19082 }
19083 // clang-format on
19084 }
19085
19086 return Opcode;
19087}
19088
19090 // Fold FNEG_VL into FMA opcodes.
19091 // The first operand of strict-fp is chain.
19092 bool IsStrict =
19093 DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode());
19094 unsigned Offset = IsStrict ? 1 : 0;
19095 SDValue A = N->getOperand(0 + Offset);
19096 SDValue B = N->getOperand(1 + Offset);
19097 SDValue C = N->getOperand(2 + Offset);
19098 SDValue Mask = N->getOperand(3 + Offset);
19099 SDValue VL = N->getOperand(4 + Offset);
19100
19101 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
19102 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
19103 V.getOperand(2) == VL) {
19104 // Return the negated input.
19105 V = V.getOperand(0);
19106 return true;
19107 }
19108
19109 return false;
19110 };
19111
19112 bool NegA = invertIfNegative(A);
19113 bool NegB = invertIfNegative(B);
19114 bool NegC = invertIfNegative(C);
19115
19116 // If no operands are negated, we're done.
19117 if (!NegA && !NegB && !NegC)
19118 return SDValue();
19119
19120 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
19121 if (IsStrict)
19122 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
19123 {N->getOperand(0), A, B, C, Mask, VL});
19124 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
19125 VL);
19126}
19127
19130 const RISCVSubtarget &Subtarget) {
19131 SelectionDAG &DAG = DCI.DAG;
19132
19134 return V;
19135
19136 // FIXME: Ignore strict opcodes for now.
19137 if (DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode()))
19138 return SDValue();
19139
19140 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
19141}
19142
19144 const RISCVSubtarget &Subtarget) {
19145 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
19146
19147 EVT VT = N->getValueType(0);
19148
19149 if (VT != Subtarget.getXLenVT())
19150 return SDValue();
19151
19152 if (!isa<ConstantSDNode>(N->getOperand(1)))
19153 return SDValue();
19154 uint64_t ShAmt = N->getConstantOperandVal(1);
19155
19156 SDValue N0 = N->getOperand(0);
19157
19158 // Combine (sra (sext_inreg (shl X, C1), iX), C2) ->
19159 // (sra (shl X, C1+(XLen-iX)), C2+(XLen-iX)) so it gets selected as SLLI+SRAI.
19160 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse()) {
19161 unsigned ExtSize =
19162 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
19163 if (ShAmt < ExtSize && N0.getOperand(0).getOpcode() == ISD::SHL &&
19164 N0.getOperand(0).hasOneUse() &&
19166 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
19167 if (LShAmt < ExtSize) {
19168 unsigned Size = VT.getSizeInBits();
19169 SDLoc ShlDL(N0.getOperand(0));
19170 SDValue Shl =
19171 DAG.getNode(ISD::SHL, ShlDL, VT, N0.getOperand(0).getOperand(0),
19172 DAG.getConstant(LShAmt + (Size - ExtSize), ShlDL, VT));
19173 SDLoc DL(N);
19174 return DAG.getNode(ISD::SRA, DL, VT, Shl,
19175 DAG.getConstant(ShAmt + (Size - ExtSize), DL, VT));
19176 }
19177 }
19178 }
19179
19180 if (ShAmt > 32 || VT != MVT::i64)
19181 return SDValue();
19182
19183 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
19184 // FIXME: Should this be a generic combine? There's a similar combine on X86.
19185 //
19186 // Also try these folds where an add or sub is in the middle.
19187 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
19188 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
19189 SDValue Shl;
19190 ConstantSDNode *AddC = nullptr;
19191
19192 // We might have an ADD or SUB between the SRA and SHL.
19193 bool IsAdd = N0.getOpcode() == ISD::ADD;
19194 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
19195 // Other operand needs to be a constant we can modify.
19196 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
19197 if (!AddC)
19198 return SDValue();
19199
19200 // AddC needs to have at least 32 trailing zeros.
19201 if (llvm::countr_zero(AddC->getZExtValue()) < 32)
19202 return SDValue();
19203
19204 // All users should be a shift by constant less than or equal to 32. This
19205 // ensures we'll do this optimization for each of them to produce an
19206 // add/sub+sext_inreg they can all share.
19207 for (SDNode *U : N0->users()) {
19208 if (U->getOpcode() != ISD::SRA ||
19209 !isa<ConstantSDNode>(U->getOperand(1)) ||
19210 U->getConstantOperandVal(1) > 32)
19211 return SDValue();
19212 }
19213
19214 Shl = N0.getOperand(IsAdd ? 0 : 1);
19215 } else {
19216 // Not an ADD or SUB.
19217 Shl = N0;
19218 }
19219
19220 // Look for a shift left by 32.
19221 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
19222 Shl.getConstantOperandVal(1) != 32)
19223 return SDValue();
19224
19225 // We if we didn't look through an add/sub, then the shl should have one use.
19226 // If we did look through an add/sub, the sext_inreg we create is free so
19227 // we're only creating 2 new instructions. It's enough to only remove the
19228 // original sra+add/sub.
19229 if (!AddC && !Shl.hasOneUse())
19230 return SDValue();
19231
19232 SDLoc DL(N);
19233 SDValue In = Shl.getOperand(0);
19234
19235 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
19236 // constant.
19237 if (AddC) {
19238 SDValue ShiftedAddC =
19239 DAG.getConstant(AddC->getZExtValue() >> 32, DL, MVT::i64);
19240 if (IsAdd)
19241 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
19242 else
19243 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
19244 }
19245
19246 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
19247 DAG.getValueType(MVT::i32));
19248 if (ShAmt == 32)
19249 return SExt;
19250
19251 return DAG.getNode(
19252 ISD::SHL, DL, MVT::i64, SExt,
19253 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
19254}
19255
19256// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
19257// the result is used as the condition of a br_cc or select_cc we can invert,
19258// inverting the setcc is free, and Z is 0/1. Caller will invert the
19259// br_cc/select_cc.
19261 bool IsAnd = Cond.getOpcode() == ISD::AND;
19262 if (!IsAnd && Cond.getOpcode() != ISD::OR)
19263 return SDValue();
19264
19265 if (!Cond.hasOneUse())
19266 return SDValue();
19267
19268 SDValue Setcc = Cond.getOperand(0);
19269 SDValue Xor = Cond.getOperand(1);
19270 // Canonicalize setcc to LHS.
19271 if (Setcc.getOpcode() != ISD::SETCC)
19272 std::swap(Setcc, Xor);
19273 // LHS should be a setcc and RHS should be an xor.
19274 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
19275 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
19276 return SDValue();
19277
19278 // If the condition is an And, SimplifyDemandedBits may have changed
19279 // (xor Z, 1) to (not Z).
19280 SDValue Xor1 = Xor.getOperand(1);
19281 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
19282 return SDValue();
19283
19284 EVT VT = Cond.getValueType();
19285 SDValue Xor0 = Xor.getOperand(0);
19286
19287 // The LHS of the xor needs to be 0/1.
19289 if (!DAG.MaskedValueIsZero(Xor0, Mask))
19290 return SDValue();
19291
19292 // We can only invert integer setccs.
19293 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
19294 if (!SetCCOpVT.isScalarInteger())
19295 return SDValue();
19296
19297 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
19298 if (ISD::isIntEqualitySetCC(CCVal)) {
19299 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
19300 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
19301 Setcc.getOperand(1), CCVal);
19302 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
19303 // Invert (setlt 0, X) by converting to (setlt X, 1).
19304 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
19305 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
19306 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
19307 // (setlt X, 1) by converting to (setlt 0, X).
19308 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
19309 DAG.getConstant(0, SDLoc(Setcc), VT),
19310 Setcc.getOperand(0), CCVal);
19311 } else
19312 return SDValue();
19313
19314 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
19315 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
19316}
19317
19318// Perform common combines for BR_CC and SELECT_CC conditions.
19319static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
19320 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
19321 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
19322
19323 // As far as arithmetic right shift always saves the sign,
19324 // shift can be omitted.
19325 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
19326 // setge (sra X, N), 0 -> setge X, 0
19327 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
19328 LHS.getOpcode() == ISD::SRA) {
19329 LHS = LHS.getOperand(0);
19330 return true;
19331 }
19332
19333 if (!ISD::isIntEqualitySetCC(CCVal))
19334 return false;
19335
19336 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
19337 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
19338 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
19339 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
19340 // If we're looking for eq 0 instead of ne 0, we need to invert the
19341 // condition.
19342 bool Invert = CCVal == ISD::SETEQ;
19343 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
19344 if (Invert)
19345 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
19346
19347 RHS = LHS.getOperand(1);
19348 LHS = LHS.getOperand(0);
19349 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG, Subtarget);
19350
19351 CC = DAG.getCondCode(CCVal);
19352 return true;
19353 }
19354
19355 // If XOR is reused and has an immediate that will fit in XORI,
19356 // do not fold.
19357 auto isXorImmediate = [](const SDValue &Op) -> bool {
19358 if (const auto *XorCnst = dyn_cast<ConstantSDNode>(Op))
19359 return isInt<12>(XorCnst->getSExtValue());
19360 return false;
19361 };
19362 // Fold (X(i1) ^ 1) == 0 -> X != 0
19363 auto singleBitOp = [&DAG](const SDValue &VarOp,
19364 const SDValue &ConstOp) -> bool {
19365 if (const auto *XorCnst = dyn_cast<ConstantSDNode>(ConstOp)) {
19366 const APInt Mask = APInt::getBitsSetFrom(VarOp.getValueSizeInBits(), 1);
19367 return (XorCnst->getSExtValue() == 1) &&
19368 DAG.MaskedValueIsZero(VarOp, Mask);
19369 }
19370 return false;
19371 };
19372 auto onlyUsedBySelectOrBR = [](const SDValue &Op) -> bool {
19373 for (const SDNode *UserNode : Op->users()) {
19374 const unsigned Opcode = UserNode->getOpcode();
19375 if (Opcode != RISCVISD::SELECT_CC && Opcode != RISCVISD::BR_CC)
19376 return false;
19377 }
19378 return true;
19379 };
19380 auto isFoldableXorEq = [isXorImmediate, singleBitOp, onlyUsedBySelectOrBR](
19381 const SDValue &LHS, const SDValue &RHS) -> bool {
19382 return LHS.getOpcode() == ISD::XOR && isNullConstant(RHS) &&
19383 (!isXorImmediate(LHS.getOperand(1)) ||
19384 singleBitOp(LHS.getOperand(0), LHS.getOperand(1)) ||
19385 onlyUsedBySelectOrBR(LHS));
19386 };
19387 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
19388 if (isFoldableXorEq(LHS, RHS)) {
19389 RHS = LHS.getOperand(1);
19390 LHS = LHS.getOperand(0);
19391 return true;
19392 }
19393 // Fold ((sext (xor X, C)), 0, eq/ne) -> ((sext(X), C, eq/ne)
19394 if (LHS.getOpcode() == ISD::SIGN_EXTEND_INREG) {
19395 const SDValue LHS0 = LHS.getOperand(0);
19396 if (isFoldableXorEq(LHS0, RHS) && isa<ConstantSDNode>(LHS0.getOperand(1))) {
19397 // SEXT(XOR(X, Y)) -> XOR(SEXT(X), SEXT(Y)))
19398 RHS = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, LHS.getValueType(),
19399 LHS0.getOperand(1), LHS.getOperand(1));
19400 LHS = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, LHS.getValueType(),
19401 LHS0.getOperand(0), LHS.getOperand(1));
19402 return true;
19403 }
19404 }
19405
19406 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
19407 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
19408 LHS.getOperand(1).getOpcode() == ISD::Constant) {
19409 SDValue LHS0 = LHS.getOperand(0);
19410 if (LHS0.getOpcode() == ISD::AND &&
19411 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
19412 uint64_t Mask = LHS0.getConstantOperandVal(1);
19413 uint64_t ShAmt = LHS.getConstantOperandVal(1);
19414 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
19415 // XAndesPerf supports branch on test bit.
19416 if (Subtarget.hasVendorXAndesPerf()) {
19417 LHS =
19418 DAG.getNode(ISD::AND, DL, LHS.getValueType(), LHS0.getOperand(0),
19419 DAG.getConstant(Mask, DL, LHS.getValueType()));
19420 return true;
19421 }
19422
19423 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
19424 CC = DAG.getCondCode(CCVal);
19425
19426 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
19427 LHS = LHS0.getOperand(0);
19428 if (ShAmt != 0)
19429 LHS =
19430 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
19431 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
19432 return true;
19433 }
19434 }
19435 }
19436
19437 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
19438 // This can occur when legalizing some floating point comparisons.
19439 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
19440 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
19441 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
19442 CC = DAG.getCondCode(CCVal);
19443 RHS = DAG.getConstant(0, DL, LHS.getValueType());
19444 return true;
19445 }
19446
19447 if (isNullConstant(RHS)) {
19448 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
19449 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
19450 CC = DAG.getCondCode(CCVal);
19451 LHS = NewCond;
19452 return true;
19453 }
19454 }
19455
19456 return false;
19457}
19458
19459// Fold
19460// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
19461// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
19462// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
19463// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
19464// (select C, (rotl Y, X), Y) -> (rotl Y, (select C, X, 0)).
19465// (select C, (rotr Y, X), Y) -> (rotr Y, (select C, X, 0)).
19467 SDValue TrueVal, SDValue FalseVal,
19468 bool Swapped) {
19469 bool Commutative = true;
19470 unsigned Opc = TrueVal.getOpcode();
19471 switch (Opc) {
19472 default:
19473 return SDValue();
19474 case ISD::SHL:
19475 case ISD::SRA:
19476 case ISD::SRL:
19477 case ISD::SUB:
19478 case ISD::ROTL:
19479 case ISD::ROTR:
19480 Commutative = false;
19481 break;
19482 case ISD::ADD:
19483 case ISD::OR:
19484 case ISD::XOR:
19485 case ISD::UMIN:
19486 case ISD::UMAX:
19487 break;
19488 }
19489
19490 if (!TrueVal.hasOneUse())
19491 return SDValue();
19492
19493 unsigned OpToFold;
19494 if (FalseVal == TrueVal.getOperand(0))
19495 OpToFold = 0;
19496 else if (Commutative && FalseVal == TrueVal.getOperand(1))
19497 OpToFold = 1;
19498 else
19499 return SDValue();
19500
19501 EVT VT = N->getValueType(0);
19502 SDLoc DL(N);
19503 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
19504 EVT OtherOpVT = OtherOp.getValueType();
19505 SDValue IdentityOperand =
19506 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
19507 if (!Commutative)
19508 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
19509 assert(IdentityOperand && "No identity operand!");
19510
19511 if (Swapped)
19512 std::swap(OtherOp, IdentityOperand);
19513 SDValue NewSel =
19514 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
19515 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
19516}
19517
19518// This tries to get rid of `select` and `icmp` that are being used to handle
19519// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
19521 SDValue Cond = N->getOperand(0);
19522
19523 // This represents either CTTZ or CTLZ instruction.
19524 SDValue CountZeroes;
19525
19526 SDValue ValOnZero;
19527
19528 if (Cond.getOpcode() != ISD::SETCC)
19529 return SDValue();
19530
19531 if (!isNullConstant(Cond->getOperand(1)))
19532 return SDValue();
19533
19534 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
19535 if (CCVal == ISD::CondCode::SETEQ) {
19536 CountZeroes = N->getOperand(2);
19537 ValOnZero = N->getOperand(1);
19538 } else if (CCVal == ISD::CondCode::SETNE) {
19539 CountZeroes = N->getOperand(1);
19540 ValOnZero = N->getOperand(2);
19541 } else {
19542 return SDValue();
19543 }
19544
19545 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
19546 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
19547 CountZeroes = CountZeroes.getOperand(0);
19548
19549 if (CountZeroes.getOpcode() != ISD::CTTZ &&
19550 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
19551 CountZeroes.getOpcode() != ISD::CTLZ &&
19552 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
19553 return SDValue();
19554
19555 if (!isNullConstant(ValOnZero))
19556 return SDValue();
19557
19558 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
19559 if (Cond->getOperand(0) != CountZeroesArgument)
19560 return SDValue();
19561
19562 unsigned BitWidth = CountZeroes.getValueSizeInBits();
19563 if (!isPowerOf2_32(BitWidth))
19564 return SDValue();
19565
19566 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
19567 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
19568 CountZeroes.getValueType(), CountZeroesArgument);
19569 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
19570 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
19571 CountZeroes.getValueType(), CountZeroesArgument);
19572 }
19573
19574 SDValue BitWidthMinusOne =
19575 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
19576
19577 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
19578 CountZeroes, BitWidthMinusOne);
19579 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
19580}
19581
19583 const RISCVSubtarget &Subtarget) {
19584 SDValue Cond = N->getOperand(0);
19585 SDValue True = N->getOperand(1);
19586 SDValue False = N->getOperand(2);
19587 SDLoc DL(N);
19588 EVT VT = N->getValueType(0);
19589 EVT CondVT = Cond.getValueType();
19590
19591 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
19592 return SDValue();
19593
19594 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
19595 // BEXTI, where C is power of 2.
19596 if (Subtarget.hasBEXTILike() && VT.isScalarInteger() &&
19597 (Subtarget.hasCZEROLike() || Subtarget.hasVendorXTHeadCondMov())) {
19598 SDValue LHS = Cond.getOperand(0);
19599 SDValue RHS = Cond.getOperand(1);
19600 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
19601 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
19602 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
19603 const APInt &MaskVal = LHS.getConstantOperandAPInt(1);
19604 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))
19605 return DAG.getSelect(DL, VT,
19606 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
19607 False, True);
19608 }
19609 }
19610 return SDValue();
19611}
19612
19613static bool matchSelectAddSub(SDValue TrueVal, SDValue FalseVal, bool &SwapCC) {
19614 if (!TrueVal.hasOneUse() || !FalseVal.hasOneUse())
19615 return false;
19616
19617 SwapCC = false;
19618 if (TrueVal.getOpcode() == ISD::SUB && FalseVal.getOpcode() == ISD::ADD) {
19619 std::swap(TrueVal, FalseVal);
19620 SwapCC = true;
19621 }
19622
19623 if (TrueVal.getOpcode() != ISD::ADD || FalseVal.getOpcode() != ISD::SUB)
19624 return false;
19625
19626 SDValue A = FalseVal.getOperand(0);
19627 SDValue B = FalseVal.getOperand(1);
19628 // Add is commutative, so check both orders
19629 return ((TrueVal.getOperand(0) == A && TrueVal.getOperand(1) == B) ||
19630 (TrueVal.getOperand(1) == A && TrueVal.getOperand(0) == B));
19631}
19632
19633/// Convert vselect CC, (add a, b), (sub a, b) to add a, (vselect CC, -b, b).
19634/// This allows us match a vadd.vv fed by a masked vrsub, which reduces
19635/// register pressure over the add followed by masked vsub sequence.
19637 SDLoc DL(N);
19638 EVT VT = N->getValueType(0);
19639 SDValue CC = N->getOperand(0);
19640 SDValue TrueVal = N->getOperand(1);
19641 SDValue FalseVal = N->getOperand(2);
19642
19643 bool SwapCC;
19644 if (!matchSelectAddSub(TrueVal, FalseVal, SwapCC))
19645 return SDValue();
19646
19647 SDValue Sub = SwapCC ? TrueVal : FalseVal;
19648 SDValue A = Sub.getOperand(0);
19649 SDValue B = Sub.getOperand(1);
19650
19651 // Arrange the select such that we can match a masked
19652 // vrsub.vi to perform the conditional negate
19653 SDValue NegB = DAG.getNegative(B, DL, VT);
19654 if (!SwapCC)
19655 CC = DAG.getLogicalNOT(DL, CC, CC->getValueType(0));
19656 SDValue NewB = DAG.getNode(ISD::VSELECT, DL, VT, CC, NegB, B);
19657 return DAG.getNode(ISD::ADD, DL, VT, A, NewB);
19658}
19659
19661 const RISCVSubtarget &Subtarget) {
19662 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
19663 return Folded;
19664
19665 if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
19666 return V;
19667
19668 if (Subtarget.hasConditionalMoveFusion())
19669 return SDValue();
19670
19671 SDValue TrueVal = N->getOperand(1);
19672 SDValue FalseVal = N->getOperand(2);
19673 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
19674 return V;
19675 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
19676}
19677
19678/// If we have a build_vector where each lane is binop X, C, where C
19679/// is a constant (but not necessarily the same constant on all lanes),
19680/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
19681/// We assume that materializing a constant build vector will be no more
19682/// expensive that performing O(n) binops.
19684 const RISCVSubtarget &Subtarget,
19685 const RISCVTargetLowering &TLI) {
19686 SDLoc DL(N);
19687 EVT VT = N->getValueType(0);
19688
19689 assert(!VT.isScalableVector() && "unexpected build vector");
19690
19691 if (VT.getVectorNumElements() == 1)
19692 return SDValue();
19693
19694 const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
19695 if (!TLI.isBinOp(Opcode))
19696 return SDValue();
19697
19698 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
19699 return SDValue();
19700
19701 // This BUILD_VECTOR involves an implicit truncation, and sinking
19702 // truncates through binops is non-trivial.
19703 if (N->op_begin()->getValueType() != VT.getVectorElementType())
19704 return SDValue();
19705
19706 SmallVector<SDValue> LHSOps;
19707 SmallVector<SDValue> RHSOps;
19708 for (SDValue Op : N->ops()) {
19709 if (Op.isUndef()) {
19710 // We can't form a divide or remainder from undef.
19711 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
19712 return SDValue();
19713
19714 LHSOps.push_back(Op);
19715 RHSOps.push_back(Op);
19716 continue;
19717 }
19718
19719 // TODO: We can handle operations which have an neutral rhs value
19720 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
19721 // of profit in a more explicit manner.
19722 if (Op.getOpcode() != Opcode || !Op.hasOneUse())
19723 return SDValue();
19724
19725 LHSOps.push_back(Op.getOperand(0));
19726 if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
19727 !isa<ConstantFPSDNode>(Op.getOperand(1)))
19728 return SDValue();
19729 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
19730 // have different LHS and RHS types.
19731 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
19732 return SDValue();
19733
19734 RHSOps.push_back(Op.getOperand(1));
19735 }
19736
19737 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
19738 DAG.getBuildVector(VT, DL, RHSOps));
19739}
19740
19742 ElementCount OpEC = OpVT.getVectorElementCount();
19743 assert(OpEC.isKnownMultipleOf(4) && OpVT.getVectorElementType() == MVT::i8);
19744 return MVT::getVectorVT(MVT::i32, OpEC.divideCoefficientBy(4));
19745}
19746
19747/// Given fixed length vectors A and B with equal element types, but possibly
19748/// different number of elements, return A + B where either A or B is zero
19749/// padded to the larger number of elements.
19751 SelectionDAG &DAG) {
19752 // NOTE: Manually doing the extract/add/insert scheme produces
19753 // significantly better codegen than the naive pad with zeros
19754 // and add scheme.
19755 EVT AVT = A.getValueType();
19756 EVT BVT = B.getValueType();
19759 std::swap(A, B);
19760 std::swap(AVT, BVT);
19761 }
19762
19763 SDValue BPart = DAG.getExtractSubvector(DL, AVT, B, 0);
19764 SDValue Res = DAG.getNode(ISD::ADD, DL, AVT, A, BPart);
19765 return DAG.getInsertSubvector(DL, B, Res, 0);
19766}
19767
19769 SelectionDAG &DAG,
19770 const RISCVSubtarget &Subtarget,
19771 const RISCVTargetLowering &TLI) {
19772 using namespace SDPatternMatch;
19773 // Note: We intentionally do not check the legality of the reduction type.
19774 // We want to handle the m4/m8 *src* types, and thus need to let illegal
19775 // intermediate types flow through here.
19776 if (InVec.getValueType().getVectorElementType() != MVT::i32 ||
19778 return SDValue();
19779
19780 // Recurse through adds/disjoint ors (since generic dag canonicalizes to that
19781 // form).
19782 SDValue A, B;
19783 if (sd_match(InVec, m_AddLike(m_Value(A), m_Value(B)))) {
19784 SDValue AOpt = foldReduceOperandViaVQDOT(A, DL, DAG, Subtarget, TLI);
19785 SDValue BOpt = foldReduceOperandViaVQDOT(B, DL, DAG, Subtarget, TLI);
19786 if (AOpt || BOpt) {
19787 if (AOpt)
19788 A = AOpt;
19789 if (BOpt)
19790 B = BOpt;
19791 // From here, we're doing A + B with mixed types, implicitly zero
19792 // padded to the wider type. Note that we *don't* need the result
19793 // type to be the original VT, and in fact prefer narrower ones
19794 // if possible.
19795 return getZeroPaddedAdd(DL, A, B, DAG);
19796 }
19797 }
19798
19799 // zext a <--> partial_reduce_umla 0, a, 1
19800 // sext a <--> partial_reduce_smla 0, a, 1
19801 if (InVec.getOpcode() == ISD::ZERO_EXTEND ||
19802 InVec.getOpcode() == ISD::SIGN_EXTEND) {
19803 SDValue A = InVec.getOperand(0);
19804 EVT OpVT = A.getValueType();
19805 if (OpVT.getVectorElementType() != MVT::i8 || !TLI.isTypeLegal(OpVT))
19806 return SDValue();
19807
19808 MVT ResVT = getQDOTXResultType(A.getSimpleValueType());
19809 SDValue B = DAG.getConstant(0x1, DL, OpVT);
19810 bool IsSigned = InVec.getOpcode() == ISD::SIGN_EXTEND;
19811 unsigned Opc =
19813 return DAG.getNode(Opc, DL, ResVT, {DAG.getConstant(0, DL, ResVT), A, B});
19814 }
19815
19816 // mul (sext a, sext b) -> partial_reduce_smla 0, a, b
19817 // mul (zext a, zext b) -> partial_reduce_umla 0, a, b
19818 // mul (sext a, zext b) -> partial_reduce_ssmla 0, a, b
19819 // mul (zext a, sext b) -> partial_reduce_smla 0, b, a (swapped)
19820 if (!sd_match(InVec, m_Mul(m_Value(A), m_Value(B))))
19821 return SDValue();
19822
19823 if (!ISD::isExtOpcode(A.getOpcode()))
19824 return SDValue();
19825
19826 EVT OpVT = A.getOperand(0).getValueType();
19827 if (OpVT.getVectorElementType() != MVT::i8 ||
19828 OpVT != B.getOperand(0).getValueType() ||
19829 !TLI.isTypeLegal(A.getValueType()))
19830 return SDValue();
19831
19832 unsigned Opc;
19833 if (A.getOpcode() == ISD::SIGN_EXTEND && B.getOpcode() == ISD::SIGN_EXTEND)
19835 else if (A.getOpcode() == ISD::ZERO_EXTEND &&
19836 B.getOpcode() == ISD::ZERO_EXTEND)
19838 else if (A.getOpcode() == ISD::SIGN_EXTEND &&
19839 B.getOpcode() == ISD::ZERO_EXTEND)
19841 else if (A.getOpcode() == ISD::ZERO_EXTEND &&
19842 B.getOpcode() == ISD::SIGN_EXTEND) {
19844 std::swap(A, B);
19845 } else
19846 return SDValue();
19847
19848 MVT ResVT = getQDOTXResultType(OpVT.getSimpleVT());
19849 return DAG.getNode(
19850 Opc, DL, ResVT,
19851 {DAG.getConstant(0, DL, ResVT), A.getOperand(0), B.getOperand(0)});
19852}
19853
19855 const RISCVSubtarget &Subtarget,
19856 const RISCVTargetLowering &TLI) {
19857 if (!Subtarget.hasStdExtZvqdotq())
19858 return SDValue();
19859
19860 SDLoc DL(N);
19861 EVT VT = N->getValueType(0);
19862 SDValue InVec = N->getOperand(0);
19863 if (SDValue V = foldReduceOperandViaVQDOT(InVec, DL, DAG, Subtarget, TLI))
19864 return DAG.getNode(ISD::VECREDUCE_ADD, DL, VT, V);
19865 return SDValue();
19866}
19867
19869 const RISCVSubtarget &Subtarget,
19870 const RISCVTargetLowering &TLI) {
19871 SDValue InVec = N->getOperand(0);
19872 SDValue InVal = N->getOperand(1);
19873 SDValue EltNo = N->getOperand(2);
19874 SDLoc DL(N);
19875
19876 EVT VT = InVec.getValueType();
19877 if (VT.isScalableVector())
19878 return SDValue();
19879
19880 if (!InVec.hasOneUse())
19881 return SDValue();
19882
19883 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
19884 // move the insert_vector_elts into the arms of the binop. Note that
19885 // the new RHS must be a constant.
19886 const unsigned InVecOpcode = InVec->getOpcode();
19887 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
19888 InVal.hasOneUse()) {
19889 SDValue InVecLHS = InVec->getOperand(0);
19890 SDValue InVecRHS = InVec->getOperand(1);
19891 SDValue InValLHS = InVal->getOperand(0);
19892 SDValue InValRHS = InVal->getOperand(1);
19893
19895 return SDValue();
19896 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
19897 return SDValue();
19898 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
19899 // have different LHS and RHS types.
19900 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
19901 return SDValue();
19903 InVecLHS, InValLHS, EltNo);
19905 InVecRHS, InValRHS, EltNo);
19906 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
19907 }
19908
19909 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
19910 // move the insert_vector_elt to the source operand of the concat_vector.
19911 if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
19912 return SDValue();
19913
19914 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
19915 if (!IndexC)
19916 return SDValue();
19917 unsigned Elt = IndexC->getZExtValue();
19918
19919 EVT ConcatVT = InVec.getOperand(0).getValueType();
19920 if (ConcatVT.getVectorElementType() != InVal.getValueType())
19921 return SDValue();
19922 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
19923 unsigned NewIdx = Elt % ConcatNumElts;
19924
19925 unsigned ConcatOpIdx = Elt / ConcatNumElts;
19926 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
19927 ConcatOp = DAG.getInsertVectorElt(DL, ConcatOp, InVal, NewIdx);
19928
19929 SmallVector<SDValue> ConcatOps(InVec->ops());
19930 ConcatOps[ConcatOpIdx] = ConcatOp;
19931 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
19932}
19933
19934// If we're concatenating a series of vector loads like
19935// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
19936// Then we can turn this into a strided load by widening the vector elements
19937// vlse32 p, stride=n
19939 const RISCVSubtarget &Subtarget,
19940 const RISCVTargetLowering &TLI) {
19941 SDLoc DL(N);
19942 EVT VT = N->getValueType(0);
19943
19944 // Only perform this combine on legal MVTs.
19945 if (!TLI.isTypeLegal(VT))
19946 return SDValue();
19947
19948 // TODO: Potentially extend this to scalable vectors
19949 if (VT.isScalableVector())
19950 return SDValue();
19951
19952 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
19953 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
19954 !SDValue(BaseLd, 0).hasOneUse())
19955 return SDValue();
19956
19957 EVT BaseLdVT = BaseLd->getValueType(0);
19958
19959 // Go through the loads and check that they're strided
19961 Lds.push_back(BaseLd);
19962 Align Align = BaseLd->getAlign();
19963 for (SDValue Op : N->ops().drop_front()) {
19964 auto *Ld = dyn_cast<LoadSDNode>(Op);
19965 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
19966 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
19967 Ld->getValueType(0) != BaseLdVT)
19968 return SDValue();
19969
19970 Lds.push_back(Ld);
19971
19972 // The common alignment is the most restrictive (smallest) of all the loads
19973 Align = std::min(Align, Ld->getAlign());
19974 }
19975
19976 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
19977 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
19978 LoadSDNode *Ld2) -> std::optional<PtrDiff> {
19979 // If the load ptrs can be decomposed into a common (Base + Index) with a
19980 // common constant stride, then return the constant stride.
19981 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
19982 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
19983 if (BIO1.equalBaseIndex(BIO2, DAG))
19984 return {{BIO2.getOffset() - BIO1.getOffset(), false}};
19985
19986 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
19987 SDValue P1 = Ld1->getBasePtr();
19988 SDValue P2 = Ld2->getBasePtr();
19989 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
19990 return {{P2.getOperand(1), false}};
19991 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
19992 return {{P1.getOperand(1), true}};
19993
19994 return std::nullopt;
19995 };
19996
19997 // Get the distance between the first and second loads
19998 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
19999 if (!BaseDiff)
20000 return SDValue();
20001
20002 // Check all the loads are the same distance apart
20003 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
20004 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
20005 return SDValue();
20006
20007 // TODO: At this point, we've successfully matched a generalized gather
20008 // load. Maybe we should emit that, and then move the specialized
20009 // matchers above and below into a DAG combine?
20010
20011 // Get the widened scalar type, e.g. v4i8 -> i64
20012 unsigned WideScalarBitWidth =
20013 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
20014 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
20015
20016 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
20017 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
20018 if (!TLI.isTypeLegal(WideVecVT))
20019 return SDValue();
20020
20021 // Check that the operation is legal
20022 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
20023 return SDValue();
20024
20025 auto [StrideVariant, MustNegateStride] = *BaseDiff;
20026 SDValue Stride =
20027 std::holds_alternative<SDValue>(StrideVariant)
20028 ? std::get<SDValue>(StrideVariant)
20029 : DAG.getSignedConstant(std::get<int64_t>(StrideVariant), DL,
20030 Lds[0]->getOffset().getValueType());
20031 if (MustNegateStride)
20032 Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
20033
20034 SDValue AllOneMask =
20035 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
20036 DAG.getConstant(1, DL, MVT::i1));
20037
20038 uint64_t MemSize;
20039 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
20040 ConstStride && ConstStride->getSExtValue() >= 0)
20041 // total size = (elsize * n) + (stride - elsize) * (n-1)
20042 // = elsize + stride * (n-1)
20043 MemSize = WideScalarVT.getSizeInBits() +
20044 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
20045 else
20046 // If Stride isn't constant, then we can't know how much it will load
20048
20050 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
20051 Align);
20052
20053 SDValue StridedLoad = DAG.getStridedLoadVP(
20054 WideVecVT, DL, BaseLd->getChain(), BaseLd->getBasePtr(), Stride,
20055 AllOneMask,
20056 DAG.getConstant(N->getNumOperands(), DL, Subtarget.getXLenVT()), MMO);
20057
20058 for (SDValue Ld : N->ops())
20059 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
20060
20061 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
20062}
20063
20065 const RISCVSubtarget &Subtarget,
20066 const RISCVTargetLowering &TLI) {
20067 SDLoc DL(N);
20068 EVT VT = N->getValueType(0);
20069 const unsigned ElementSize = VT.getScalarSizeInBits();
20070 const unsigned NumElts = VT.getVectorNumElements();
20071 SDValue V1 = N->getOperand(0);
20072 SDValue V2 = N->getOperand(1);
20073 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask();
20074 MVT XLenVT = Subtarget.getXLenVT();
20075
20076 // Recognized a disguised select of add/sub.
20077 bool SwapCC;
20078 if (ShuffleVectorInst::isSelectMask(Mask, NumElts) &&
20079 matchSelectAddSub(V1, V2, SwapCC)) {
20080 SDValue Sub = SwapCC ? V1 : V2;
20081 SDValue A = Sub.getOperand(0);
20082 SDValue B = Sub.getOperand(1);
20083
20084 SmallVector<SDValue> MaskVals;
20085 for (int MaskIndex : Mask) {
20086 bool SelectMaskVal = (MaskIndex < (int)NumElts);
20087 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
20088 }
20089 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
20090 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts);
20091 SDValue CC = DAG.getBuildVector(MaskVT, DL, MaskVals);
20092
20093 // Arrange the select such that we can match a masked
20094 // vrsub.vi to perform the conditional negate
20095 SDValue NegB = DAG.getNegative(B, DL, VT);
20096 if (!SwapCC)
20097 CC = DAG.getLogicalNOT(DL, CC, CC->getValueType(0));
20098 SDValue NewB = DAG.getNode(ISD::VSELECT, DL, VT, CC, NegB, B);
20099 return DAG.getNode(ISD::ADD, DL, VT, A, NewB);
20100 }
20101
20102 // Custom legalize <N x i128> or <N x i256> to <M x ELEN>. This runs
20103 // during the combine phase before type legalization, and relies on
20104 // DAGCombine not undoing the transform if isShuffleMaskLegal returns false
20105 // for the source mask.
20106 if (TLI.isTypeLegal(VT) || ElementSize <= Subtarget.getELen() ||
20107 !isPowerOf2_64(ElementSize) || VT.getVectorNumElements() % 2 != 0 ||
20108 VT.isFloatingPoint() || TLI.isShuffleMaskLegal(Mask, VT))
20109 return SDValue();
20110
20111 SmallVector<int, 8> NewMask;
20112 narrowShuffleMaskElts(2, Mask, NewMask);
20113
20114 LLVMContext &C = *DAG.getContext();
20115 EVT NewEltVT = EVT::getIntegerVT(C, ElementSize / 2);
20116 EVT NewVT = EVT::getVectorVT(C, NewEltVT, VT.getVectorNumElements() * 2);
20117 SDValue Res = DAG.getVectorShuffle(NewVT, DL, DAG.getBitcast(NewVT, V1),
20118 DAG.getBitcast(NewVT, V2), NewMask);
20119 return DAG.getBitcast(VT, Res);
20120}
20121
20123 const RISCVSubtarget &Subtarget) {
20124 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
20125
20126 if (N->getValueType(0).isFixedLengthVector())
20127 return SDValue();
20128
20129 SDValue Addend = N->getOperand(0);
20130 SDValue MulOp = N->getOperand(1);
20131
20132 if (N->getOpcode() == RISCVISD::ADD_VL) {
20133 SDValue AddPassthruOp = N->getOperand(2);
20134 if (!AddPassthruOp.isUndef())
20135 return SDValue();
20136 }
20137
20138 auto IsVWMulOpc = [](unsigned Opc) {
20139 switch (Opc) {
20140 case RISCVISD::VWMUL_VL:
20141 case RISCVISD::VWMULU_VL:
20142 case RISCVISD::VWMULSU_VL:
20143 return true;
20144 default:
20145 return false;
20146 }
20147 };
20148
20149 if (!IsVWMulOpc(MulOp.getOpcode()))
20150 std::swap(Addend, MulOp);
20151
20152 if (!IsVWMulOpc(MulOp.getOpcode()))
20153 return SDValue();
20154
20155 SDValue MulPassthruOp = MulOp.getOperand(2);
20156
20157 if (!MulPassthruOp.isUndef())
20158 return SDValue();
20159
20160 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
20161 const RISCVSubtarget &Subtarget) {
20162 if (N->getOpcode() == ISD::ADD) {
20163 SDLoc DL(N);
20164 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
20165 Subtarget);
20166 }
20167 return std::make_pair(N->getOperand(3), N->getOperand(4));
20168 }(N, DAG, Subtarget);
20169
20170 SDValue MulMask = MulOp.getOperand(3);
20171 SDValue MulVL = MulOp.getOperand(4);
20172
20173 if (AddMask != MulMask || AddVL != MulVL)
20174 return SDValue();
20175
20176 const auto &TSInfo =
20177 static_cast<const RISCVSelectionDAGInfo &>(DAG.getSelectionDAGInfo());
20178 unsigned Opc = TSInfo.getMAccOpcode(MulOp.getOpcode());
20179
20180 SDLoc DL(N);
20181 EVT VT = N->getValueType(0);
20182 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
20183 AddVL};
20184 return DAG.getNode(Opc, DL, VT, Ops);
20185}
20186
20188 const RISCVSubtarget &Subtarget) {
20189
20190 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
20191
20192 if (!N->getValueType(0).isVector())
20193 return SDValue();
20194
20195 SDValue Addend = N->getOperand(0);
20196 SDValue DotOp = N->getOperand(1);
20197
20198 if (N->getOpcode() == RISCVISD::ADD_VL) {
20199 SDValue AddPassthruOp = N->getOperand(2);
20200 if (!AddPassthruOp.isUndef())
20201 return SDValue();
20202 }
20203
20204 auto IsVqdotqOpc = [](unsigned Opc) {
20205 switch (Opc) {
20206 case RISCVISD::VQDOT_VL:
20207 case RISCVISD::VQDOTU_VL:
20208 case RISCVISD::VQDOTSU_VL:
20209 return true;
20210 default:
20211 return false;
20212 }
20213 };
20214
20215 if (!IsVqdotqOpc(DotOp.getOpcode()))
20216 std::swap(Addend, DotOp);
20217
20218 if (!IsVqdotqOpc(DotOp.getOpcode()))
20219 return SDValue();
20220
20221 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
20222 const RISCVSubtarget &Subtarget) {
20223 if (N->getOpcode() == ISD::ADD) {
20224 SDLoc DL(N);
20225 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
20226 Subtarget);
20227 }
20228 return std::make_pair(N->getOperand(3), N->getOperand(4));
20229 }(N, DAG, Subtarget);
20230
20231 SDValue MulVL = DotOp.getOperand(4);
20232 if (AddVL != MulVL)
20233 return SDValue();
20234
20235 if (AddMask.getOpcode() != RISCVISD::VMSET_VL ||
20236 AddMask.getOperand(0) != MulVL)
20237 return SDValue();
20238
20239 SDValue AccumOp = DotOp.getOperand(2);
20240 SDLoc DL(N);
20241 EVT VT = N->getValueType(0);
20242 Addend = DAG.getNode(RISCVISD::ADD_VL, DL, VT, Addend, AccumOp,
20243 DAG.getUNDEF(VT), AddMask, AddVL);
20244
20245 SDValue Ops[] = {DotOp.getOperand(0), DotOp.getOperand(1), Addend,
20246 DotOp.getOperand(3), DotOp->getOperand(4)};
20247 return DAG.getNode(DotOp->getOpcode(), DL, VT, Ops);
20248}
20249
20250static bool
20252 ISD::MemIndexType &IndexType,
20254 if (!DCI.isBeforeLegalize())
20255 return false;
20256
20257 SelectionDAG &DAG = DCI.DAG;
20258 const MVT XLenVT =
20259 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
20260
20261 const EVT IndexVT = Index.getValueType();
20262
20263 // RISC-V indexed loads only support the "unsigned unscaled" addressing
20264 // mode, so anything else must be manually legalized.
20265 if (!isIndexTypeSigned(IndexType))
20266 return false;
20267
20268 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
20269 // Any index legalization should first promote to XLenVT, so we don't lose
20270 // bits when scaling. This may create an illegal index type so we let
20271 // LLVM's legalization take care of the splitting.
20272 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
20273 Index = DAG.getNode(ISD::SIGN_EXTEND, DL,
20274 EVT::getVectorVT(*DAG.getContext(), XLenVT,
20275 IndexVT.getVectorElementCount()),
20276 Index);
20277 }
20278 IndexType = ISD::UNSIGNED_SCALED;
20279 return true;
20280}
20281
20282/// Match the index vector of a scatter or gather node as the shuffle mask
20283/// which performs the rearrangement if possible. Will only match if
20284/// all lanes are touched, and thus replacing the scatter or gather with
20285/// a unit strided access and shuffle is legal.
20286static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask,
20287 SmallVector<int> &ShuffleMask) {
20288 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
20289 return false;
20290 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
20291 return false;
20292
20293 const unsigned ElementSize = VT.getScalarStoreSize();
20294 const unsigned NumElems = VT.getVectorNumElements();
20295
20296 // Create the shuffle mask and check all bits active
20297 assert(ShuffleMask.empty());
20298 BitVector ActiveLanes(NumElems);
20299 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
20300 // TODO: We've found an active bit of UB, and could be
20301 // more aggressive here if desired.
20302 if (Index->getOperand(i)->isUndef())
20303 return false;
20304 uint64_t C = Index->getConstantOperandVal(i);
20305 if (C % ElementSize != 0)
20306 return false;
20307 C = C / ElementSize;
20308 if (C >= NumElems)
20309 return false;
20310 ShuffleMask.push_back(C);
20311 ActiveLanes.set(C);
20312 }
20313 return ActiveLanes.all();
20314}
20315
20316/// Match the index of a gather or scatter operation as an operation
20317/// with twice the element width and half the number of elements. This is
20318/// generally profitable (if legal) because these operations are linear
20319/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
20320/// come out ahead.
20321static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask,
20322 Align BaseAlign, const RISCVSubtarget &ST) {
20323 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
20324 return false;
20325 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
20326 return false;
20327
20328 // Attempt a doubling. If we can use a element type 4x or 8x in
20329 // size, this will happen via multiply iterations of the transform.
20330 const unsigned NumElems = VT.getVectorNumElements();
20331 if (NumElems % 2 != 0)
20332 return false;
20333
20334 const unsigned ElementSize = VT.getScalarStoreSize();
20335 const unsigned WiderElementSize = ElementSize * 2;
20336 if (WiderElementSize > ST.getELen()/8)
20337 return false;
20338
20339 if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
20340 return false;
20341
20342 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
20343 // TODO: We've found an active bit of UB, and could be
20344 // more aggressive here if desired.
20345 if (Index->getOperand(i)->isUndef())
20346 return false;
20347 // TODO: This offset check is too strict if we support fully
20348 // misaligned memory operations.
20349 uint64_t C = Index->getConstantOperandVal(i);
20350 if (i % 2 == 0) {
20351 if (C % WiderElementSize != 0)
20352 return false;
20353 continue;
20354 }
20355 uint64_t Last = Index->getConstantOperandVal(i-1);
20356 if (C != Last + ElementSize)
20357 return false;
20358 }
20359 return true;
20360}
20361
20362// trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
20363// This would be benefit for the cases where X and Y are both the same value
20364// type of low precision vectors. Since the truncate would be lowered into
20365// n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
20366// restriction, such pattern would be expanded into a series of "vsetvli"
20367// and "vnsrl" instructions later to reach this point.
20369 SDValue Mask = N->getOperand(1);
20370 SDValue VL = N->getOperand(2);
20371
20372 bool IsVLMAX = isAllOnesConstant(VL) ||
20373 (isa<RegisterSDNode>(VL) &&
20374 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
20375 if (!IsVLMAX || Mask.getOpcode() != RISCVISD::VMSET_VL ||
20376 Mask.getOperand(0) != VL)
20377 return SDValue();
20378
20379 auto IsTruncNode = [&](SDValue V) {
20380 return V.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
20381 V.getOperand(1) == Mask && V.getOperand(2) == VL;
20382 };
20383
20384 SDValue Op = N->getOperand(0);
20385
20386 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
20387 // to distinguish such pattern.
20388 while (IsTruncNode(Op)) {
20389 if (!Op.hasOneUse())
20390 return SDValue();
20391 Op = Op.getOperand(0);
20392 }
20393
20394 if (Op.getOpcode() != ISD::SRA || !Op.hasOneUse())
20395 return SDValue();
20396
20397 SDValue N0 = Op.getOperand(0);
20398 SDValue N1 = Op.getOperand(1);
20399 if (N0.getOpcode() != ISD::SIGN_EXTEND || !N0.hasOneUse() ||
20400 N1.getOpcode() != ISD::ZERO_EXTEND || !N1.hasOneUse())
20401 return SDValue();
20402
20403 SDValue N00 = N0.getOperand(0);
20404 SDValue N10 = N1.getOperand(0);
20405 if (!N00.getValueType().isVector() ||
20406 N00.getValueType() != N10.getValueType() ||
20407 N->getValueType(0) != N10.getValueType())
20408 return SDValue();
20409
20410 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
20411 SDValue SMin =
20412 DAG.getNode(ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
20413 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
20414 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
20415}
20416
20417// Combine (truncate_vector_vl (umin X, C)) -> (vnclipu_vl X) if C is the
20418// maximum value for the truncated type.
20419// Combine (truncate_vector_vl (smin (smax X, C2), C1)) -> (vnclip_vl X) if C1
20420// is the signed maximum value for the truncated type and C2 is the signed
20421// minimum value.
20423 const RISCVSubtarget &Subtarget) {
20424 assert(N->getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL);
20425
20426 MVT VT = N->getSimpleValueType(0);
20427
20428 SDValue Mask = N->getOperand(1);
20429 SDValue VL = N->getOperand(2);
20430
20431 auto MatchMinMax = [&VL, &Mask](SDValue V, unsigned Opc, unsigned OpcVL,
20432 APInt &SplatVal) {
20433 if (V.getOpcode() != Opc &&
20434 !(V.getOpcode() == OpcVL && V.getOperand(2).isUndef() &&
20435 V.getOperand(3) == Mask && V.getOperand(4) == VL))
20436 return SDValue();
20437
20438 SDValue Op = V.getOperand(1);
20439
20440 // Peek through conversion between fixed and scalable vectors.
20441 if (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op.getOperand(0).isUndef() &&
20442 isNullConstant(Op.getOperand(2)) &&
20443 Op.getOperand(1).getValueType().isFixedLengthVector() &&
20444 Op.getOperand(1).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
20445 Op.getOperand(1).getOperand(0).getValueType() == Op.getValueType() &&
20446 isNullConstant(Op.getOperand(1).getOperand(1)))
20447 Op = Op.getOperand(1).getOperand(0);
20448
20449 if (ISD::isConstantSplatVector(Op.getNode(), SplatVal))
20450 return V.getOperand(0);
20451
20452 if (Op.getOpcode() == RISCVISD::VMV_V_X_VL && Op.getOperand(0).isUndef() &&
20453 Op.getOperand(2) == VL) {
20454 if (auto *Op1 = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
20455 SplatVal =
20456 Op1->getAPIntValue().sextOrTrunc(Op.getScalarValueSizeInBits());
20457 return V.getOperand(0);
20458 }
20459 }
20460
20461 return SDValue();
20462 };
20463
20464 SDLoc DL(N);
20465
20466 auto DetectUSatPattern = [&](SDValue V) {
20467 APInt LoC, HiC;
20468
20469 // Simple case, V is a UMIN.
20470 if (SDValue UMinOp = MatchMinMax(V, ISD::UMIN, RISCVISD::UMIN_VL, HiC))
20471 if (HiC.isMask(VT.getScalarSizeInBits()))
20472 return UMinOp;
20473
20474 // If we have an SMAX that removes negative numbers first, then we can match
20475 // SMIN instead of UMIN.
20476 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
20477 if (SDValue SMaxOp =
20478 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
20479 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()))
20480 return SMinOp;
20481
20482 // If we have an SMIN before an SMAX and the SMAX constant is less than or
20483 // equal to the SMIN constant, we can use vnclipu if we insert a new SMAX
20484 // first.
20485 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
20486 if (SDValue SMinOp =
20487 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
20488 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()) &&
20489 HiC.uge(LoC))
20490 return DAG.getNode(RISCVISD::SMAX_VL, DL, V.getValueType(), SMinOp,
20491 V.getOperand(1), DAG.getUNDEF(V.getValueType()),
20492 Mask, VL);
20493
20494 return SDValue();
20495 };
20496
20497 auto DetectSSatPattern = [&](SDValue V) {
20498 unsigned NumDstBits = VT.getScalarSizeInBits();
20499 unsigned NumSrcBits = V.getScalarValueSizeInBits();
20500 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
20501 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
20502
20503 APInt HiC, LoC;
20504 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
20505 if (SDValue SMaxOp =
20506 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
20507 if (HiC == SignedMax && LoC == SignedMin)
20508 return SMaxOp;
20509
20510 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
20511 if (SDValue SMinOp =
20512 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
20513 if (HiC == SignedMax && LoC == SignedMin)
20514 return SMinOp;
20515
20516 return SDValue();
20517 };
20518
20519 SDValue Src = N->getOperand(0);
20520
20521 // Look through multiple layers of truncates.
20522 while (Src.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
20523 Src.getOperand(1) == Mask && Src.getOperand(2) == VL &&
20524 Src.hasOneUse())
20525 Src = Src.getOperand(0);
20526
20527 SDValue Val;
20528 unsigned ClipOpc;
20529 if ((Val = DetectUSatPattern(Src)))
20530 ClipOpc = RISCVISD::TRUNCATE_VECTOR_VL_USAT;
20531 else if ((Val = DetectSSatPattern(Src)))
20532 ClipOpc = RISCVISD::TRUNCATE_VECTOR_VL_SSAT;
20533 else
20534 return SDValue();
20535
20536 MVT ValVT = Val.getSimpleValueType();
20537
20538 do {
20539 MVT ValEltVT = MVT::getIntegerVT(ValVT.getScalarSizeInBits() / 2);
20540 ValVT = ValVT.changeVectorElementType(ValEltVT);
20541 Val = DAG.getNode(ClipOpc, DL, ValVT, Val, Mask, VL);
20542 } while (ValVT != VT);
20543
20544 return Val;
20545}
20546
20547// Convert
20548// (iX ctpop (bitcast (vXi1 A)))
20549// ->
20550// (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))
20551// and
20552// (iN reduce.add (zext (vXi1 A to vXiN))
20553// ->
20554// (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))
20555// FIXME: It's complicated to match all the variations of this after type
20556// legalization so we only handle the pre-type legalization pattern, but that
20557// requires the fixed vector type to be legal.
20559 const RISCVSubtarget &Subtarget) {
20560 unsigned Opc = N->getOpcode();
20562 "Unexpected opcode");
20563 EVT VT = N->getValueType(0);
20564 if (!VT.isScalarInteger())
20565 return SDValue();
20566
20567 SDValue Src = N->getOperand(0);
20568
20569 if (Opc == ISD::CTPOP) {
20570 // Peek through zero_extend. It doesn't change the count.
20571 if (Src.getOpcode() == ISD::ZERO_EXTEND)
20572 Src = Src.getOperand(0);
20573
20574 if (Src.getOpcode() != ISD::BITCAST)
20575 return SDValue();
20576 Src = Src.getOperand(0);
20577 } else if (Opc == ISD::VECREDUCE_ADD) {
20578 if (Src.getOpcode() != ISD::ZERO_EXTEND)
20579 return SDValue();
20580 Src = Src.getOperand(0);
20581 }
20582
20583 EVT SrcEVT = Src.getValueType();
20584 if (!SrcEVT.isSimple())
20585 return SDValue();
20586
20587 MVT SrcMVT = SrcEVT.getSimpleVT();
20588 // Make sure the input is an i1 vector.
20589 if (!SrcMVT.isVector() || SrcMVT.getVectorElementType() != MVT::i1)
20590 return SDValue();
20591
20592 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20593 if (!TLI.isTypeLegal(SrcMVT))
20594 return SDValue();
20595
20596 // Check that destination type is large enough to hold result without
20597 // overflow.
20598 if (Opc == ISD::VECREDUCE_ADD) {
20599 unsigned EltSize = SrcMVT.getScalarSizeInBits();
20600 unsigned MinSize = SrcMVT.getSizeInBits().getKnownMinValue();
20601 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
20602 unsigned MaxVLMAX = SrcMVT.isFixedLengthVector()
20603 ? SrcMVT.getVectorNumElements()
20605 VectorBitsMax, EltSize, MinSize);
20606 if (VT.getFixedSizeInBits() < Log2_32(MaxVLMAX) + 1)
20607 return SDValue();
20608 }
20609
20610 MVT ContainerVT = SrcMVT;
20611 if (SrcMVT.isFixedLengthVector()) {
20612 ContainerVT = getContainerForFixedLengthVector(DAG, SrcMVT, Subtarget);
20613 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
20614 }
20615
20616 SDLoc DL(N);
20617 auto [Mask, VL] = getDefaultVLOps(SrcMVT, ContainerVT, DL, DAG, Subtarget);
20618
20619 MVT XLenVT = Subtarget.getXLenVT();
20620 SDValue Pop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Src, Mask, VL);
20621 return DAG.getZExtOrTrunc(Pop, DL, VT);
20622}
20623
20626 const RISCVSubtarget &Subtarget) {
20627 // (shl (zext x), y) -> (vwsll x, y)
20628 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20629 return V;
20630
20631 // (shl (sext x), C) -> (vwmulsu x, 1u << C)
20632 // (shl (zext x), C) -> (vwmulu x, 1u << C)
20633
20634 if (!DCI.isAfterLegalizeDAG())
20635 return SDValue();
20636
20637 SDValue LHS = N->getOperand(0);
20638 if (!LHS.hasOneUse())
20639 return SDValue();
20640 unsigned Opcode;
20641 switch (LHS.getOpcode()) {
20642 case ISD::SIGN_EXTEND:
20643 case RISCVISD::VSEXT_VL:
20644 Opcode = RISCVISD::VWMULSU_VL;
20645 break;
20646 case ISD::ZERO_EXTEND:
20647 case RISCVISD::VZEXT_VL:
20648 Opcode = RISCVISD::VWMULU_VL;
20649 break;
20650 default:
20651 return SDValue();
20652 }
20653
20654 SDValue RHS = N->getOperand(1);
20655 APInt ShAmt;
20656 uint64_t ShAmtInt;
20657 if (ISD::isConstantSplatVector(RHS.getNode(), ShAmt))
20658 ShAmtInt = ShAmt.getZExtValue();
20659 else if (RHS.getOpcode() == RISCVISD::VMV_V_X_VL &&
20660 RHS.getOperand(1).getOpcode() == ISD::Constant)
20661 ShAmtInt = RHS.getConstantOperandVal(1);
20662 else
20663 return SDValue();
20664
20665 // Better foldings:
20666 // (shl (sext x), 1) -> (vwadd x, x)
20667 // (shl (zext x), 1) -> (vwaddu x, x)
20668 if (ShAmtInt <= 1)
20669 return SDValue();
20670
20671 SDValue NarrowOp = LHS.getOperand(0);
20672 MVT NarrowVT = NarrowOp.getSimpleValueType();
20673 uint64_t NarrowBits = NarrowVT.getScalarSizeInBits();
20674 if (ShAmtInt >= NarrowBits)
20675 return SDValue();
20676 MVT VT = N->getSimpleValueType(0);
20677 if (NarrowBits * 2 != VT.getScalarSizeInBits())
20678 return SDValue();
20679
20680 SelectionDAG &DAG = DCI.DAG;
20681 SDLoc DL(N);
20682 SDValue Passthru, Mask, VL;
20683 switch (N->getOpcode()) {
20684 case ISD::SHL:
20685 Passthru = DAG.getUNDEF(VT);
20686 std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
20687 break;
20688 case RISCVISD::SHL_VL:
20689 Passthru = N->getOperand(2);
20690 Mask = N->getOperand(3);
20691 VL = N->getOperand(4);
20692 break;
20693 default:
20694 llvm_unreachable("Expected SHL");
20695 }
20696 return DAG.getNode(Opcode, DL, VT, NarrowOp,
20697 DAG.getConstant(1ULL << ShAmtInt, SDLoc(RHS), NarrowVT),
20698 Passthru, Mask, VL);
20699}
20700
20702 DAGCombinerInfo &DCI) const {
20703 SelectionDAG &DAG = DCI.DAG;
20704 const MVT XLenVT = Subtarget.getXLenVT();
20705 SDLoc DL(N);
20706
20707 // Helper to call SimplifyDemandedBits on an operand of N where only some low
20708 // bits are demanded. N will be added to the Worklist if it was not deleted.
20709 // Caller should return SDValue(N, 0) if this returns true.
20710 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
20711 SDValue Op = N->getOperand(OpNo);
20712 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
20713 if (!SimplifyDemandedBits(Op, Mask, DCI))
20714 return false;
20715
20716 if (N->getOpcode() != ISD::DELETED_NODE)
20717 DCI.AddToWorklist(N);
20718 return true;
20719 };
20720
20721 switch (N->getOpcode()) {
20722 default:
20723 break;
20724 case RISCVISD::SplitF64: {
20725 SDValue Op0 = N->getOperand(0);
20726 // If the input to SplitF64 is just BuildPairF64 then the operation is
20727 // redundant. Instead, use BuildPairF64's operands directly.
20728 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
20729 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
20730
20731 if (Op0->isUndef()) {
20732 SDValue Lo = DAG.getUNDEF(MVT::i32);
20733 SDValue Hi = DAG.getUNDEF(MVT::i32);
20734 return DCI.CombineTo(N, Lo, Hi);
20735 }
20736
20737 // It's cheaper to materialise two 32-bit integers than to load a double
20738 // from the constant pool and transfer it to integer registers through the
20739 // stack.
20741 APInt V = C->getValueAPF().bitcastToAPInt();
20742 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
20743 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
20744 return DCI.CombineTo(N, Lo, Hi);
20745 }
20746
20747 // This is a target-specific version of a DAGCombine performed in
20748 // DAGCombiner::visitBITCAST. It performs the equivalent of:
20749 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
20750 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
20751 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
20752 !Op0.getNode()->hasOneUse() || Subtarget.hasStdExtZdinx())
20753 break;
20754 SDValue NewSplitF64 =
20755 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
20756 Op0.getOperand(0));
20757 SDValue Lo = NewSplitF64.getValue(0);
20758 SDValue Hi = NewSplitF64.getValue(1);
20759 APInt SignBit = APInt::getSignMask(32);
20760 if (Op0.getOpcode() == ISD::FNEG) {
20761 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
20762 DAG.getConstant(SignBit, DL, MVT::i32));
20763 return DCI.CombineTo(N, Lo, NewHi);
20764 }
20765 assert(Op0.getOpcode() == ISD::FABS);
20766 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
20767 DAG.getConstant(~SignBit, DL, MVT::i32));
20768 return DCI.CombineTo(N, Lo, NewHi);
20769 }
20770 case RISCVISD::SLLW:
20771 case RISCVISD::SRAW:
20772 case RISCVISD::SRLW:
20773 case RISCVISD::RORW:
20774 case RISCVISD::ROLW: {
20775 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
20776 if (SimplifyDemandedLowBitsHelper(0, 32) ||
20777 SimplifyDemandedLowBitsHelper(1, 5))
20778 return SDValue(N, 0);
20779
20780 break;
20781 }
20782 case RISCVISD::ABSW:
20783 case RISCVISD::CLZW:
20784 case RISCVISD::CTZW: {
20785 // Only the lower 32 bits of the first operand are read
20786 if (SimplifyDemandedLowBitsHelper(0, 32))
20787 return SDValue(N, 0);
20788 break;
20789 }
20790 case RISCVISD::FMV_W_X_RV64: {
20791 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
20792 // conversion is unnecessary and can be replaced with the
20793 // FMV_X_ANYEXTW_RV64 operand.
20794 SDValue Op0 = N->getOperand(0);
20795 if (Op0.getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64)
20796 return Op0.getOperand(0);
20797 break;
20798 }
20799 case RISCVISD::FMV_X_ANYEXTH:
20800 case RISCVISD::FMV_X_ANYEXTW_RV64: {
20801 SDLoc DL(N);
20802 SDValue Op0 = N->getOperand(0);
20803 MVT VT = N->getSimpleValueType(0);
20804
20805 // Constant fold.
20806 if (auto *CFP = dyn_cast<ConstantFPSDNode>(Op0)) {
20807 APInt Val = CFP->getValueAPF().bitcastToAPInt().sext(VT.getSizeInBits());
20808 return DAG.getConstant(Val, DL, VT);
20809 }
20810
20811 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
20812 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
20813 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
20814 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
20815 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
20816 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
20817 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
20818 assert(Op0.getOperand(0).getValueType() == VT &&
20819 "Unexpected value type!");
20820 return Op0.getOperand(0);
20821 }
20822
20823 if (ISD::isNormalLoad(Op0.getNode()) && Op0.hasOneUse() &&
20824 cast<LoadSDNode>(Op0)->isSimple()) {
20826 auto *LN0 = cast<LoadSDNode>(Op0);
20827 SDValue Load =
20828 DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(),
20829 LN0->getBasePtr(), IVT, LN0->getMemOperand());
20830 DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
20831 return Load;
20832 }
20833
20834 // This is a target-specific version of a DAGCombine performed in
20835 // DAGCombiner::visitBITCAST. It performs the equivalent of:
20836 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
20837 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
20838 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
20839 !Op0.getNode()->hasOneUse())
20840 break;
20841 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
20842 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
20843 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
20844 if (Op0.getOpcode() == ISD::FNEG)
20845 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
20846 DAG.getConstant(SignBit, DL, VT));
20847
20848 assert(Op0.getOpcode() == ISD::FABS);
20849 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
20850 DAG.getConstant(~SignBit, DL, VT));
20851 }
20852 case ISD::ABS: {
20853 EVT VT = N->getValueType(0);
20854 SDValue N0 = N->getOperand(0);
20855 // abs (sext) -> zext (abs)
20856 // abs (zext) -> zext (handled elsewhere)
20857 if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
20858 SDValue Src = N0.getOperand(0);
20859 SDLoc DL(N);
20860 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
20861 DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src));
20862 }
20863 break;
20864 }
20865 case ISD::ADD: {
20866 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20867 return V;
20868 if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
20869 return V;
20870 if (SDValue V = combineVqdotAccum(N, DAG, Subtarget))
20871 return V;
20872 return performADDCombine(N, DCI, Subtarget);
20873 }
20874 case ISD::SUB: {
20875 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20876 return V;
20877 return performSUBCombine(N, DAG, Subtarget);
20878 }
20879 case ISD::AND:
20880 return performANDCombine(N, DCI, Subtarget);
20881 case ISD::OR: {
20882 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20883 return V;
20884 return performORCombine(N, DCI, Subtarget);
20885 }
20886 case ISD::XOR:
20887 return performXORCombine(N, DAG, Subtarget);
20888 case ISD::MUL:
20889 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
20890 return V;
20891 return performMULCombine(N, DAG, DCI, Subtarget);
20892 case ISD::SDIV:
20893 case ISD::UDIV:
20894 case ISD::SREM:
20895 case ISD::UREM:
20896 if (SDValue V = combineBinOpOfZExt(N, DAG))
20897 return V;
20898 break;
20899 case ISD::FMUL: {
20900 using namespace SDPatternMatch;
20901 SDLoc DL(N);
20902 EVT VT = N->getValueType(0);
20903 SDValue X, Y;
20904 // InstCombine canonicalizes fneg (fmul x, y) -> fmul x, (fneg y), see
20905 // hoistFNegAboveFMulFDiv.
20906 // Undo this and sink the fneg so we match more fmsub/fnmadd patterns.
20908 return DAG.getNode(ISD::FNEG, DL, VT,
20909 DAG.getNode(ISD::FMUL, DL, VT, X, Y, N->getFlags()),
20910 N->getFlags());
20911
20912 // fmul X, (copysign 1.0, Y) -> fsgnjx X, Y
20913 SDValue N0 = N->getOperand(0);
20914 SDValue N1 = N->getOperand(1);
20915 if (N0->getOpcode() != ISD::FCOPYSIGN)
20916 std::swap(N0, N1);
20917 if (N0->getOpcode() != ISD::FCOPYSIGN)
20918 return SDValue();
20920 if (!C || !C->getValueAPF().isExactlyValue(+1.0))
20921 return SDValue();
20922 if (VT.isVector() || !isOperationLegal(ISD::FCOPYSIGN, VT))
20923 return SDValue();
20924 SDValue Sign = N0->getOperand(1);
20925 if (Sign.getValueType() != VT)
20926 return SDValue();
20927 return DAG.getNode(RISCVISD::FSGNJX, DL, VT, N1, N0->getOperand(1));
20928 }
20929 case ISD::FADD:
20930 case ISD::UMAX:
20931 case ISD::UMIN:
20932 case ISD::SMAX:
20933 case ISD::SMIN:
20934 case ISD::FMAXNUM:
20935 case ISD::FMINNUM: {
20936 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
20937 return V;
20938 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
20939 return V;
20940 return SDValue();
20941 }
20942 case ISD::SETCC:
20943 return performSETCCCombine(N, DCI, Subtarget);
20945 return performSIGN_EXTEND_INREGCombine(N, DCI, Subtarget);
20946 case ISD::ZERO_EXTEND:
20947 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
20948 // type legalization. This is safe because fp_to_uint produces poison if
20949 // it overflows.
20950 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
20951 SDValue Src = N->getOperand(0);
20952 if (Src.getOpcode() == ISD::FP_TO_UINT &&
20953 isTypeLegal(Src.getOperand(0).getValueType()))
20954 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
20955 Src.getOperand(0));
20956 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
20957 isTypeLegal(Src.getOperand(1).getValueType())) {
20958 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
20959 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
20960 Src.getOperand(0), Src.getOperand(1));
20961 DCI.CombineTo(N, Res);
20962 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
20963 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
20964 return SDValue(N, 0); // Return N so it doesn't get rechecked.
20965 }
20966 }
20967 return SDValue();
20968 case RISCVISD::TRUNCATE_VECTOR_VL:
20969 if (SDValue V = combineTruncOfSraSext(N, DAG))
20970 return V;
20971 return combineTruncToVnclip(N, DAG, Subtarget);
20972 case ISD::VP_TRUNCATE:
20973 return performVP_TRUNCATECombine(N, DAG, Subtarget);
20974 case ISD::TRUNCATE:
20975 return performTRUNCATECombine(N, DAG, Subtarget);
20976 case ISD::SELECT:
20977 return performSELECTCombine(N, DAG, Subtarget);
20978 case ISD::VSELECT:
20979 return performVSELECTCombine(N, DAG);
20980 case RISCVISD::CZERO_EQZ:
20981 case RISCVISD::CZERO_NEZ: {
20982 SDValue Val = N->getOperand(0);
20983 SDValue Cond = N->getOperand(1);
20984
20985 unsigned Opc = N->getOpcode();
20986
20987 // czero_eqz x, x -> x
20988 if (Opc == RISCVISD::CZERO_EQZ && Val == Cond)
20989 return Val;
20990
20991 unsigned InvOpc =
20992 Opc == RISCVISD::CZERO_EQZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ;
20993
20994 // czero_eqz X, (xor Y, 1) -> czero_nez X, Y if Y is 0 or 1.
20995 // czero_nez X, (xor Y, 1) -> czero_eqz X, Y if Y is 0 or 1.
20996 if (Cond.getOpcode() == ISD::XOR && isOneConstant(Cond.getOperand(1))) {
20997 SDValue NewCond = Cond.getOperand(0);
20998 APInt Mask = APInt::getBitsSetFrom(NewCond.getValueSizeInBits(), 1);
20999 if (DAG.MaskedValueIsZero(NewCond, Mask))
21000 return DAG.getNode(InvOpc, SDLoc(N), N->getValueType(0), Val, NewCond);
21001 }
21002 // czero_eqz x, (setcc y, 0, ne) -> czero_eqz x, y
21003 // czero_nez x, (setcc y, 0, ne) -> czero_nez x, y
21004 // czero_eqz x, (setcc y, 0, eq) -> czero_nez x, y
21005 // czero_nez x, (setcc y, 0, eq) -> czero_eqz x, y
21006 if (Cond.getOpcode() == ISD::SETCC && isNullConstant(Cond.getOperand(1))) {
21007 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
21008 if (ISD::isIntEqualitySetCC(CCVal))
21009 return DAG.getNode(CCVal == ISD::SETNE ? Opc : InvOpc, SDLoc(N),
21010 N->getValueType(0), Val, Cond.getOperand(0));
21011 }
21012 return SDValue();
21013 }
21014 case RISCVISD::SELECT_CC: {
21015 // Transform
21016 SDValue LHS = N->getOperand(0);
21017 SDValue RHS = N->getOperand(1);
21018 SDValue CC = N->getOperand(2);
21019 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
21020 SDValue TrueV = N->getOperand(3);
21021 SDValue FalseV = N->getOperand(4);
21022 SDLoc DL(N);
21023 EVT VT = N->getValueType(0);
21024
21025 // If the True and False values are the same, we don't need a select_cc.
21026 if (TrueV == FalseV)
21027 return TrueV;
21028
21029 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
21030 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
21031 if (!Subtarget.hasShortForwardBranchIALU() && isa<ConstantSDNode>(TrueV) &&
21032 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
21033 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
21034 if (CCVal == ISD::CondCode::SETGE)
21035 std::swap(TrueV, FalseV);
21036
21037 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
21038 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
21039 // Only handle simm12, if it is not in this range, it can be considered as
21040 // register.
21041 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
21042 isInt<12>(TrueSImm - FalseSImm)) {
21043 SDValue SRA =
21044 DAG.getNode(ISD::SRA, DL, VT, LHS,
21045 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
21046 SDValue AND =
21047 DAG.getNode(ISD::AND, DL, VT, SRA,
21048 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
21049 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
21050 }
21051
21052 if (CCVal == ISD::CondCode::SETGE)
21053 std::swap(TrueV, FalseV);
21054 }
21055
21056 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
21057 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
21058 {LHS, RHS, CC, TrueV, FalseV});
21059
21060 if (!Subtarget.hasConditionalMoveFusion()) {
21061 // (select c, -1, y) -> -c | y
21062 if (isAllOnesConstant(TrueV)) {
21063 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
21064 SDValue Neg = DAG.getNegative(C, DL, VT);
21065 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
21066 }
21067 // (select c, y, -1) -> -!c | y
21068 if (isAllOnesConstant(FalseV)) {
21069 SDValue C =
21070 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
21071 SDValue Neg = DAG.getNegative(C, DL, VT);
21072 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
21073 }
21074
21075 // (select c, 0, y) -> -!c & y
21076 if (isNullConstant(TrueV)) {
21077 SDValue C =
21078 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
21079 SDValue Neg = DAG.getNegative(C, DL, VT);
21080 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
21081 }
21082 // (select c, y, 0) -> -c & y
21083 if (isNullConstant(FalseV)) {
21084 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
21085 SDValue Neg = DAG.getNegative(C, DL, VT);
21086 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
21087 }
21088 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
21089 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
21090 if (((isOneConstant(FalseV) && LHS == TrueV &&
21091 CCVal == ISD::CondCode::SETNE) ||
21092 (isOneConstant(TrueV) && LHS == FalseV &&
21093 CCVal == ISD::CondCode::SETEQ)) &&
21094 isNullConstant(RHS)) {
21095 // freeze it to be safe.
21096 LHS = DAG.getFreeze(LHS);
21097 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, ISD::CondCode::SETEQ);
21098 return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
21099 }
21100 }
21101
21102 // If both true/false are an xor with 1, pull through the select.
21103 // This can occur after op legalization if both operands are setccs that
21104 // require an xor to invert.
21105 // FIXME: Generalize to other binary ops with identical operand?
21106 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
21107 TrueV.getOperand(1) == FalseV.getOperand(1) &&
21108 isOneConstant(TrueV.getOperand(1)) &&
21109 TrueV.hasOneUse() && FalseV.hasOneUse()) {
21110 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
21111 TrueV.getOperand(0), FalseV.getOperand(0));
21112 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
21113 }
21114
21115 return SDValue();
21116 }
21117 case RISCVISD::BR_CC: {
21118 SDValue LHS = N->getOperand(1);
21119 SDValue RHS = N->getOperand(2);
21120 SDValue CC = N->getOperand(3);
21121 SDLoc DL(N);
21122
21123 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
21124 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
21125 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
21126
21127 return SDValue();
21128 }
21129 case ISD::BITREVERSE:
21130 return performBITREVERSECombine(N, DAG, Subtarget);
21131 case ISD::FP_TO_SINT:
21132 case ISD::FP_TO_UINT:
21133 return performFP_TO_INTCombine(N, DCI, Subtarget);
21136 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
21137 case ISD::FCOPYSIGN: {
21138 EVT VT = N->getValueType(0);
21139 if (!VT.isVector())
21140 break;
21141 // There is a form of VFSGNJ which injects the negated sign of its second
21142 // operand. Try and bubble any FNEG up after the extend/round to produce
21143 // this optimized pattern. Avoid modifying cases where FP_ROUND and
21144 // TRUNC=1.
21145 SDValue In2 = N->getOperand(1);
21146 // Avoid cases where the extend/round has multiple uses, as duplicating
21147 // those is typically more expensive than removing a fneg.
21148 if (!In2.hasOneUse())
21149 break;
21150 if (In2.getOpcode() != ISD::FP_EXTEND &&
21151 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
21152 break;
21153 In2 = In2.getOperand(0);
21154 if (In2.getOpcode() != ISD::FNEG)
21155 break;
21156 SDLoc DL(N);
21157 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
21158 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
21159 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
21160 }
21161 case ISD::MGATHER: {
21162 const auto *MGN = cast<MaskedGatherSDNode>(N);
21163 const EVT VT = N->getValueType(0);
21164 SDValue Index = MGN->getIndex();
21165 SDValue ScaleOp = MGN->getScale();
21166 ISD::MemIndexType IndexType = MGN->getIndexType();
21167 assert(!MGN->isIndexScaled() &&
21168 "Scaled gather/scatter should not be formed");
21169
21170 SDLoc DL(N);
21171 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
21172 return DAG.getMaskedGather(
21173 N->getVTList(), MGN->getMemoryVT(), DL,
21174 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
21175 MGN->getBasePtr(), Index, ScaleOp},
21176 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
21177
21178 if (narrowIndex(Index, IndexType, DAG))
21179 return DAG.getMaskedGather(
21180 N->getVTList(), MGN->getMemoryVT(), DL,
21181 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
21182 MGN->getBasePtr(), Index, ScaleOp},
21183 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
21184
21185 if (Index.getOpcode() == ISD::BUILD_VECTOR &&
21186 MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
21187 // The sequence will be XLenVT, not the type of Index. Tell
21188 // isSimpleVIDSequence this so we avoid overflow.
21189 if (std::optional<VIDSequence> SimpleVID =
21190 isSimpleVIDSequence(Index, Subtarget.getXLen());
21191 SimpleVID && SimpleVID->StepDenominator == 1) {
21192 const int64_t StepNumerator = SimpleVID->StepNumerator;
21193 const int64_t Addend = SimpleVID->Addend;
21194
21195 // Note: We don't need to check alignment here since (by assumption
21196 // from the existence of the gather), our offsets must be sufficiently
21197 // aligned.
21198
21199 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
21200 assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
21201 assert(IndexType == ISD::UNSIGNED_SCALED);
21202 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
21203 DAG.getSignedConstant(Addend, DL, PtrVT));
21204
21205 SDValue EVL = DAG.getElementCount(DL, Subtarget.getXLenVT(),
21207 SDValue StridedLoad = DAG.getStridedLoadVP(
21208 VT, DL, MGN->getChain(), BasePtr,
21209 DAG.getSignedConstant(StepNumerator, DL, XLenVT), MGN->getMask(),
21210 EVL, MGN->getMemOperand());
21211 SDValue Select = DAG.getSelect(DL, VT, MGN->getMask(), StridedLoad,
21212 MGN->getPassThru());
21213 return DAG.getMergeValues({Select, SDValue(StridedLoad.getNode(), 1)},
21214 DL);
21215 }
21216 }
21217
21218 SmallVector<int> ShuffleMask;
21219 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
21220 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
21221 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
21222 MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
21223 MGN->getMask(), DAG.getUNDEF(VT),
21224 MGN->getMemoryVT(), MGN->getMemOperand(),
21226 SDValue Shuffle =
21227 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
21228 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
21229 }
21230
21231 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
21232 matchIndexAsWiderOp(VT, Index, MGN->getMask(),
21233 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
21234 SmallVector<SDValue> NewIndices;
21235 for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
21236 NewIndices.push_back(Index.getOperand(i));
21237 EVT IndexVT = Index.getValueType()
21239 Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
21240
21241 unsigned ElementSize = VT.getScalarStoreSize();
21242 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
21243 auto EltCnt = VT.getVectorElementCount();
21244 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
21245 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
21246 EltCnt.divideCoefficientBy(2));
21247 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
21248 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
21249 EltCnt.divideCoefficientBy(2));
21250 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
21251
21252 SDValue Gather =
21253 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
21254 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
21255 Index, ScaleOp},
21256 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
21257 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
21258 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
21259 }
21260 break;
21261 }
21262 case ISD::MSCATTER:{
21263 const auto *MSN = cast<MaskedScatterSDNode>(N);
21264 SDValue Index = MSN->getIndex();
21265 SDValue ScaleOp = MSN->getScale();
21266 ISD::MemIndexType IndexType = MSN->getIndexType();
21267 assert(!MSN->isIndexScaled() &&
21268 "Scaled gather/scatter should not be formed");
21269
21270 SDLoc DL(N);
21271 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
21272 return DAG.getMaskedScatter(
21273 N->getVTList(), MSN->getMemoryVT(), DL,
21274 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
21275 Index, ScaleOp},
21276 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
21277
21278 if (narrowIndex(Index, IndexType, DAG))
21279 return DAG.getMaskedScatter(
21280 N->getVTList(), MSN->getMemoryVT(), DL,
21281 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
21282 Index, ScaleOp},
21283 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
21284
21285 EVT VT = MSN->getValue()->getValueType(0);
21286 SmallVector<int> ShuffleMask;
21287 if (!MSN->isTruncatingStore() &&
21288 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
21289 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
21290 DAG.getUNDEF(VT), ShuffleMask);
21291 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
21292 DAG.getUNDEF(XLenVT), MSN->getMask(),
21293 MSN->getMemoryVT(), MSN->getMemOperand(),
21294 ISD::UNINDEXED, false);
21295 }
21296 break;
21297 }
21298 case ISD::VP_GATHER: {
21299 const auto *VPGN = cast<VPGatherSDNode>(N);
21300 SDValue Index = VPGN->getIndex();
21301 SDValue ScaleOp = VPGN->getScale();
21302 ISD::MemIndexType IndexType = VPGN->getIndexType();
21303 assert(!VPGN->isIndexScaled() &&
21304 "Scaled gather/scatter should not be formed");
21305
21306 SDLoc DL(N);
21307 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
21308 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
21309 {VPGN->getChain(), VPGN->getBasePtr(), Index,
21310 ScaleOp, VPGN->getMask(),
21311 VPGN->getVectorLength()},
21312 VPGN->getMemOperand(), IndexType);
21313
21314 if (narrowIndex(Index, IndexType, DAG))
21315 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
21316 {VPGN->getChain(), VPGN->getBasePtr(), Index,
21317 ScaleOp, VPGN->getMask(),
21318 VPGN->getVectorLength()},
21319 VPGN->getMemOperand(), IndexType);
21320
21321 break;
21322 }
21323 case ISD::VP_SCATTER: {
21324 const auto *VPSN = cast<VPScatterSDNode>(N);
21325 SDValue Index = VPSN->getIndex();
21326 SDValue ScaleOp = VPSN->getScale();
21327 ISD::MemIndexType IndexType = VPSN->getIndexType();
21328 assert(!VPSN->isIndexScaled() &&
21329 "Scaled gather/scatter should not be formed");
21330
21331 SDLoc DL(N);
21332 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
21333 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
21334 {VPSN->getChain(), VPSN->getValue(),
21335 VPSN->getBasePtr(), Index, ScaleOp,
21336 VPSN->getMask(), VPSN->getVectorLength()},
21337 VPSN->getMemOperand(), IndexType);
21338
21339 if (narrowIndex(Index, IndexType, DAG))
21340 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
21341 {VPSN->getChain(), VPSN->getValue(),
21342 VPSN->getBasePtr(), Index, ScaleOp,
21343 VPSN->getMask(), VPSN->getVectorLength()},
21344 VPSN->getMemOperand(), IndexType);
21345 break;
21346 }
21347 case RISCVISD::SHL_VL:
21348 if (SDValue V = performSHLCombine(N, DCI, Subtarget))
21349 return V;
21350 [[fallthrough]];
21351 case RISCVISD::SRA_VL:
21352 case RISCVISD::SRL_VL: {
21353 SDValue ShAmt = N->getOperand(1);
21354 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
21355 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
21356 SDLoc DL(N);
21357 SDValue VL = N->getOperand(4);
21358 EVT VT = N->getValueType(0);
21359 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
21360 ShAmt.getOperand(1), VL);
21361 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
21362 N->getOperand(2), N->getOperand(3), N->getOperand(4));
21363 }
21364 break;
21365 }
21366 case ISD::SRA:
21367 if (SDValue V = performSRACombine(N, DAG, Subtarget))
21368 return V;
21369 [[fallthrough]];
21370 case ISD::SRL:
21371 case ISD::SHL: {
21372 if (N->getOpcode() == ISD::SHL) {
21373 if (SDValue V = performSHLCombine(N, DCI, Subtarget))
21374 return V;
21375 }
21376 SDValue ShAmt = N->getOperand(1);
21377 if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
21378 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
21379 SDLoc DL(N);
21380 EVT VT = N->getValueType(0);
21381 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
21382 ShAmt.getOperand(1),
21383 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
21384 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
21385 }
21386 break;
21387 }
21388 case RISCVISD::ADD_VL:
21389 if (SDValue V = simplifyOp_VL(N))
21390 return V;
21391 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
21392 return V;
21393 if (SDValue V = combineVqdotAccum(N, DAG, Subtarget))
21394 return V;
21395 return combineToVWMACC(N, DAG, Subtarget);
21396 case RISCVISD::VWADD_W_VL:
21397 case RISCVISD::VWADDU_W_VL:
21398 case RISCVISD::VWSUB_W_VL:
21399 case RISCVISD::VWSUBU_W_VL:
21400 return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
21401 case RISCVISD::OR_VL:
21402 case RISCVISD::SUB_VL:
21403 case RISCVISD::MUL_VL:
21404 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
21405 case RISCVISD::VFMADD_VL:
21406 case RISCVISD::VFNMADD_VL:
21407 case RISCVISD::VFMSUB_VL:
21408 case RISCVISD::VFNMSUB_VL:
21409 case RISCVISD::STRICT_VFMADD_VL:
21410 case RISCVISD::STRICT_VFNMADD_VL:
21411 case RISCVISD::STRICT_VFMSUB_VL:
21412 case RISCVISD::STRICT_VFNMSUB_VL:
21413 return performVFMADD_VLCombine(N, DCI, Subtarget);
21414 case RISCVISD::FADD_VL:
21415 case RISCVISD::FSUB_VL:
21416 case RISCVISD::FMUL_VL:
21417 case RISCVISD::VFWADD_W_VL:
21418 case RISCVISD::VFWSUB_W_VL:
21419 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
21420 case ISD::LOAD:
21421 case ISD::STORE: {
21422 if (DCI.isAfterLegalizeDAG())
21423 if (SDValue V = performMemPairCombine(N, DCI))
21424 return V;
21425
21426 if (N->getOpcode() != ISD::STORE)
21427 break;
21428
21429 auto *Store = cast<StoreSDNode>(N);
21430 SDValue Chain = Store->getChain();
21431 EVT MemVT = Store->getMemoryVT();
21432 SDValue Val = Store->getValue();
21433 SDLoc DL(N);
21434
21435 bool IsScalarizable =
21436 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
21437 Store->isSimple() &&
21438 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
21439 isPowerOf2_64(MemVT.getSizeInBits()) &&
21440 MemVT.getSizeInBits() <= Subtarget.getXLen();
21441
21442 // If sufficiently aligned we can scalarize stores of constant vectors of
21443 // any power-of-two size up to XLen bits, provided that they aren't too
21444 // expensive to materialize.
21445 // vsetivli zero, 2, e8, m1, ta, ma
21446 // vmv.v.i v8, 4
21447 // vse64.v v8, (a0)
21448 // ->
21449 // li a1, 1028
21450 // sh a1, 0(a0)
21451 if (DCI.isBeforeLegalize() && IsScalarizable &&
21453 // Get the constant vector bits
21454 APInt NewC(Val.getValueSizeInBits(), 0);
21455 uint64_t EltSize = Val.getScalarValueSizeInBits();
21456 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
21457 if (Val.getOperand(i).isUndef())
21458 continue;
21459 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
21460 i * EltSize);
21461 }
21462 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
21463
21464 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
21465 true) <= 2 &&
21467 NewVT, *Store->getMemOperand())) {
21468 SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
21469 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
21470 Store->getPointerInfo(), Store->getBaseAlign(),
21471 Store->getMemOperand()->getFlags());
21472 }
21473 }
21474
21475 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
21476 // vsetivli zero, 2, e16, m1, ta, ma
21477 // vle16.v v8, (a0)
21478 // vse16.v v8, (a1)
21479 if (auto *L = dyn_cast<LoadSDNode>(Val);
21480 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
21481 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
21482 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
21483 L->getMemoryVT() == MemVT) {
21484 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
21486 NewVT, *Store->getMemOperand()) &&
21488 NewVT, *L->getMemOperand())) {
21489 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
21490 L->getPointerInfo(), L->getBaseAlign(),
21491 L->getMemOperand()->getFlags());
21492 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
21493 Store->getPointerInfo(), Store->getBaseAlign(),
21494 Store->getMemOperand()->getFlags());
21495 }
21496 }
21497
21498 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
21499 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
21500 // any illegal types.
21501 if ((Val.getOpcode() == RISCVISD::VMV_X_S ||
21502 (DCI.isAfterLegalizeDAG() &&
21504 isNullConstant(Val.getOperand(1)))) &&
21505 Val.hasOneUse()) {
21506 SDValue Src = Val.getOperand(0);
21507 MVT VecVT = Src.getSimpleValueType();
21508 // VecVT should be scalable and memory VT should match the element type.
21509 if (!Store->isIndexed() && VecVT.isScalableVector() &&
21510 MemVT == VecVT.getVectorElementType()) {
21511 SDLoc DL(N);
21512 MVT MaskVT = getMaskTypeFor(VecVT);
21513 return DAG.getStoreVP(
21514 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
21515 DAG.getConstant(1, DL, MaskVT),
21516 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
21517 Store->getMemOperand(), Store->getAddressingMode(),
21518 Store->isTruncatingStore(), /*IsCompress*/ false);
21519 }
21520 }
21521
21522 break;
21523 }
21524 case ISD::SPLAT_VECTOR: {
21525 EVT VT = N->getValueType(0);
21526 // Only perform this combine on legal MVT types.
21527 if (!isTypeLegal(VT))
21528 break;
21529 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
21530 DAG, Subtarget))
21531 return Gather;
21532 break;
21533 }
21534 case ISD::BUILD_VECTOR:
21535 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
21536 return V;
21537 break;
21539 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
21540 return V;
21541 break;
21543 if (SDValue V = performVECTOR_SHUFFLECombine(N, DAG, Subtarget, *this))
21544 return V;
21545 break;
21547 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
21548 return V;
21549 break;
21550 case RISCVISD::VFMV_V_F_VL: {
21551 const MVT VT = N->getSimpleValueType(0);
21552 SDValue Passthru = N->getOperand(0);
21553 SDValue Scalar = N->getOperand(1);
21554 SDValue VL = N->getOperand(2);
21555
21556 // If VL is 1, we can use vfmv.s.f.
21557 if (isOneConstant(VL))
21558 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
21559 break;
21560 }
21561 case RISCVISD::VMV_V_X_VL: {
21562 const MVT VT = N->getSimpleValueType(0);
21563 SDValue Passthru = N->getOperand(0);
21564 SDValue Scalar = N->getOperand(1);
21565 SDValue VL = N->getOperand(2);
21566
21567 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
21568 // scalar input.
21569 unsigned ScalarSize = Scalar.getValueSizeInBits();
21570 unsigned EltWidth = VT.getScalarSizeInBits();
21571 if (ScalarSize > EltWidth && Passthru.isUndef())
21572 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
21573 return SDValue(N, 0);
21574
21575 // If VL is 1 and the scalar value won't benefit from immediate, we can
21576 // use vmv.s.x.
21578 if (isOneConstant(VL) &&
21579 (!Const || Const->isZero() ||
21580 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
21581 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
21582
21583 break;
21584 }
21585 case RISCVISD::VFMV_S_F_VL: {
21586 SDValue Src = N->getOperand(1);
21587 // Try to remove vector->scalar->vector if the scalar->vector is inserting
21588 // into an undef vector.
21589 // TODO: Could use a vslide or vmv.v.v for non-undef.
21590 if (N->getOperand(0).isUndef() &&
21591 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
21592 isNullConstant(Src.getOperand(1)) &&
21593 Src.getOperand(0).getValueType().isScalableVector()) {
21594 EVT VT = N->getValueType(0);
21595 SDValue EVSrc = Src.getOperand(0);
21596 EVT EVSrcVT = EVSrc.getValueType();
21598 // Widths match, just return the original vector.
21599 if (EVSrcVT == VT)
21600 return EVSrc;
21601 SDLoc DL(N);
21602 // Width is narrower, using insert_subvector.
21603 if (EVSrcVT.getVectorMinNumElements() < VT.getVectorMinNumElements()) {
21604 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT),
21605 EVSrc,
21606 DAG.getConstant(0, DL, Subtarget.getXLenVT()));
21607 }
21608 // Width is wider, using extract_subvector.
21609 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, EVSrc,
21610 DAG.getConstant(0, DL, Subtarget.getXLenVT()));
21611 }
21612 [[fallthrough]];
21613 }
21614 case RISCVISD::VMV_S_X_VL: {
21615 const MVT VT = N->getSimpleValueType(0);
21616 SDValue Passthru = N->getOperand(0);
21617 SDValue Scalar = N->getOperand(1);
21618 SDValue VL = N->getOperand(2);
21619
21620 // The vmv.s.x instruction copies the scalar integer register to element 0
21621 // of the destination vector register. If SEW < XLEN, the least-significant
21622 // bits are copied and the upper XLEN-SEW bits are ignored.
21623 unsigned ScalarSize = Scalar.getValueSizeInBits();
21624 unsigned EltWidth = VT.getScalarSizeInBits();
21625 if (ScalarSize > EltWidth && SimplifyDemandedLowBitsHelper(1, EltWidth))
21626 return SDValue(N, 0);
21627
21628 if (Scalar.getOpcode() == RISCVISD::VMV_X_S && Passthru.isUndef() &&
21629 Scalar.getOperand(0).getValueType() == N->getValueType(0))
21630 return Scalar.getOperand(0);
21631
21632 // Use M1 or smaller to avoid over constraining register allocation
21633 const MVT M1VT = RISCVTargetLowering::getM1VT(VT);
21634 if (M1VT.bitsLT(VT)) {
21635 SDValue M1Passthru = DAG.getExtractSubvector(DL, M1VT, Passthru, 0);
21636 SDValue Result =
21637 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
21638 Result = DAG.getInsertSubvector(DL, Passthru, Result, 0);
21639 return Result;
21640 }
21641
21642 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
21643 // higher would involve overly constraining the register allocator for
21644 // no purpose.
21645 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
21646 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
21647 VT.bitsLE(RISCVTargetLowering::getM1VT(VT)) && Passthru.isUndef())
21648 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
21649
21650 break;
21651 }
21652 case RISCVISD::VMV_X_S: {
21653 SDValue Vec = N->getOperand(0);
21654 MVT VecVT = N->getOperand(0).getSimpleValueType();
21655 const MVT M1VT = RISCVTargetLowering::getM1VT(VecVT);
21656 if (M1VT.bitsLT(VecVT)) {
21657 Vec = DAG.getExtractSubvector(DL, M1VT, Vec, 0);
21658 return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec);
21659 }
21660 break;
21661 }
21665 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
21666 unsigned IntNo = N->getConstantOperandVal(IntOpNo);
21667 switch (IntNo) {
21668 // By default we do not combine any intrinsic.
21669 default:
21670 return SDValue();
21671 case Intrinsic::riscv_vcpop:
21672 case Intrinsic::riscv_vcpop_mask:
21673 case Intrinsic::riscv_vfirst:
21674 case Intrinsic::riscv_vfirst_mask: {
21675 SDValue VL = N->getOperand(2);
21676 if (IntNo == Intrinsic::riscv_vcpop_mask ||
21677 IntNo == Intrinsic::riscv_vfirst_mask)
21678 VL = N->getOperand(3);
21679 if (!isNullConstant(VL))
21680 return SDValue();
21681 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
21682 SDLoc DL(N);
21683 EVT VT = N->getValueType(0);
21684 if (IntNo == Intrinsic::riscv_vfirst ||
21685 IntNo == Intrinsic::riscv_vfirst_mask)
21686 return DAG.getAllOnesConstant(DL, VT);
21687 return DAG.getConstant(0, DL, VT);
21688 }
21689 case Intrinsic::riscv_vsseg2_mask:
21690 case Intrinsic::riscv_vsseg3_mask:
21691 case Intrinsic::riscv_vsseg4_mask:
21692 case Intrinsic::riscv_vsseg5_mask:
21693 case Intrinsic::riscv_vsseg6_mask:
21694 case Intrinsic::riscv_vsseg7_mask:
21695 case Intrinsic::riscv_vsseg8_mask: {
21696 SDValue Tuple = N->getOperand(2);
21697 unsigned NF = Tuple.getValueType().getRISCVVectorTupleNumFields();
21698
21699 if (Subtarget.hasOptimizedSegmentLoadStore(NF) || !Tuple.hasOneUse() ||
21700 Tuple.getOpcode() != RISCVISD::TUPLE_INSERT ||
21701 !Tuple.getOperand(0).isUndef())
21702 return SDValue();
21703
21704 SDValue Val = Tuple.getOperand(1);
21705 unsigned Idx = Tuple.getConstantOperandVal(2);
21706
21707 unsigned SEW = Val.getValueType().getScalarSizeInBits();
21708 assert(Log2_64(SEW) == N->getConstantOperandVal(6) &&
21709 "Type mismatch without bitcast?");
21710 unsigned Stride = SEW / 8 * NF;
21711 unsigned Offset = SEW / 8 * Idx;
21712
21713 SDValue Ops[] = {
21714 /*Chain=*/N->getOperand(0),
21715 /*IntID=*/
21716 DAG.getTargetConstant(Intrinsic::riscv_vsse_mask, DL, XLenVT),
21717 /*StoredVal=*/Val,
21718 /*Ptr=*/
21719 DAG.getNode(ISD::ADD, DL, XLenVT, N->getOperand(3),
21720 DAG.getConstant(Offset, DL, XLenVT)),
21721 /*Stride=*/DAG.getConstant(Stride, DL, XLenVT),
21722 /*Mask=*/N->getOperand(4),
21723 /*VL=*/N->getOperand(5)};
21724
21725 auto *OldMemSD = cast<MemIntrinsicSDNode>(N);
21726 // Match getTgtMemIntrinsic for non-unit stride case
21727 EVT MemVT = OldMemSD->getMemoryVT().getScalarType();
21730 OldMemSD->getMemOperand(), Offset, MemoryLocation::UnknownSize);
21731
21732 SDVTList VTs = DAG.getVTList(MVT::Other);
21733 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VTs, Ops, MemVT,
21734 MMO);
21735 }
21736 }
21737 }
21738 case ISD::EXPERIMENTAL_VP_REVERSE:
21739 return performVP_REVERSECombine(N, DAG, Subtarget);
21740 case ISD::VP_STORE:
21741 return performVP_STORECombine(N, DAG, Subtarget);
21742 case ISD::BITCAST: {
21743 assert(Subtarget.useRVVForFixedLengthVectors());
21744 SDValue N0 = N->getOperand(0);
21745 EVT VT = N->getValueType(0);
21746 EVT SrcVT = N0.getValueType();
21747 if (VT.isRISCVVectorTuple() && N0->getOpcode() == ISD::SPLAT_VECTOR) {
21748 unsigned NF = VT.getRISCVVectorTupleNumFields();
21749 unsigned NumScalElts = VT.getSizeInBits().getKnownMinValue() / (NF * 8);
21750 SDValue EltVal = DAG.getConstant(0, DL, Subtarget.getXLenVT());
21751 MVT ScalTy = MVT::getScalableVectorVT(MVT::getIntegerVT(8), NumScalElts);
21752
21753 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, DL, ScalTy, EltVal);
21754
21755 SDValue Result = DAG.getUNDEF(VT);
21756 for (unsigned i = 0; i < NF; ++i)
21757 Result = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Result, Splat,
21758 DAG.getTargetConstant(i, DL, MVT::i32));
21759 return Result;
21760 }
21761 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
21762 // type, widen both sides to avoid a trip through memory.
21763 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
21764 VT.isScalarInteger()) {
21765 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
21766 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
21767 Ops[0] = N0;
21768 SDLoc DL(N);
21769 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
21770 N0 = DAG.getBitcast(MVT::i8, N0);
21771 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
21772 }
21773
21774 return SDValue();
21775 }
21776 case ISD::VECREDUCE_ADD:
21777 if (SDValue V = performVECREDUCECombine(N, DAG, Subtarget, *this))
21778 return V;
21779 [[fallthrough]];
21780 case ISD::CTPOP:
21781 if (SDValue V = combineToVCPOP(N, DAG, Subtarget))
21782 return V;
21783 break;
21784 case RISCVISD::VRGATHER_VX_VL: {
21785 // Note this assumes that out of bounds indices produce poison
21786 // and can thus be replaced without having to prove them inbounds..
21787 EVT VT = N->getValueType(0);
21788 SDValue Src = N->getOperand(0);
21789 SDValue Idx = N->getOperand(1);
21790 SDValue Passthru = N->getOperand(2);
21791 SDValue VL = N->getOperand(4);
21792
21793 // Warning: Unlike most cases we strip an insert_subvector, this one
21794 // does not require the first operand to be undef.
21795 if (Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
21796 isNullConstant(Src.getOperand(2)))
21797 Src = Src.getOperand(1);
21798
21799 switch (Src.getOpcode()) {
21800 default:
21801 break;
21802 case RISCVISD::VMV_V_X_VL:
21803 case RISCVISD::VFMV_V_F_VL:
21804 // Drop a redundant vrgather_vx.
21805 // TODO: Remove the type restriction if we find a motivating
21806 // test case?
21807 if (Passthru.isUndef() && VL == Src.getOperand(2) &&
21808 Src.getValueType() == VT)
21809 return Src;
21810 break;
21811 case RISCVISD::VMV_S_X_VL:
21812 case RISCVISD::VFMV_S_F_VL:
21813 // If this use only demands lane zero from the source vmv.s.x, and
21814 // doesn't have a passthru, then this vrgather.vi/vx is equivalent to
21815 // a vmv.v.x. Note that there can be other uses of the original
21816 // vmv.s.x and thus we can't eliminate it. (vfmv.s.f is analogous)
21817 if (isNullConstant(Idx) && Passthru.isUndef() &&
21818 VL == Src.getOperand(2)) {
21819 unsigned Opc =
21820 VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
21821 return DAG.getNode(Opc, DL, VT, DAG.getUNDEF(VT), Src.getOperand(1),
21822 VL);
21823 }
21824 break;
21825 }
21826 break;
21827 }
21828 case RISCVISD::TUPLE_EXTRACT: {
21829 EVT VT = N->getValueType(0);
21830 SDValue Tuple = N->getOperand(0);
21831 unsigned Idx = N->getConstantOperandVal(1);
21832 if (!Tuple.hasOneUse() || Tuple.getOpcode() != ISD::INTRINSIC_W_CHAIN)
21833 break;
21834
21835 unsigned NF = 0;
21836 switch (Tuple.getConstantOperandVal(1)) {
21837 default:
21838 break;
21839 case Intrinsic::riscv_vlseg2_mask:
21840 case Intrinsic::riscv_vlseg3_mask:
21841 case Intrinsic::riscv_vlseg4_mask:
21842 case Intrinsic::riscv_vlseg5_mask:
21843 case Intrinsic::riscv_vlseg6_mask:
21844 case Intrinsic::riscv_vlseg7_mask:
21845 case Intrinsic::riscv_vlseg8_mask:
21846 NF = Tuple.getValueType().getRISCVVectorTupleNumFields();
21847 break;
21848 }
21849
21850 if (!NF || Subtarget.hasOptimizedSegmentLoadStore(NF))
21851 break;
21852
21853 unsigned SEW = VT.getScalarSizeInBits();
21854 assert(Log2_64(SEW) == Tuple.getConstantOperandVal(7) &&
21855 "Type mismatch without bitcast?");
21856 unsigned Stride = SEW / 8 * NF;
21857 unsigned Offset = SEW / 8 * Idx;
21858
21859 SDValue Ops[] = {
21860 /*Chain=*/Tuple.getOperand(0),
21861 /*IntID=*/DAG.getTargetConstant(Intrinsic::riscv_vlse_mask, DL, XLenVT),
21862 /*Passthru=*/Tuple.getOperand(2),
21863 /*Ptr=*/
21864 DAG.getNode(ISD::ADD, DL, XLenVT, Tuple.getOperand(3),
21865 DAG.getConstant(Offset, DL, XLenVT)),
21866 /*Stride=*/DAG.getConstant(Stride, DL, XLenVT),
21867 /*Mask=*/Tuple.getOperand(4),
21868 /*VL=*/Tuple.getOperand(5),
21869 /*Policy=*/Tuple.getOperand(6)};
21870
21871 auto *TupleMemSD = cast<MemIntrinsicSDNode>(Tuple);
21872 // Match getTgtMemIntrinsic for non-unit stride case
21873 EVT MemVT = TupleMemSD->getMemoryVT().getScalarType();
21876 TupleMemSD->getMemOperand(), Offset, MemoryLocation::UnknownSize);
21877
21878 SDVTList VTs = DAG.getVTList({VT, MVT::Other});
21880 Ops, MemVT, MMO);
21881 DAG.ReplaceAllUsesOfValueWith(Tuple.getValue(1), Result.getValue(1));
21882 return Result.getValue(0);
21883 }
21884 case RISCVISD::TUPLE_INSERT: {
21885 // tuple_insert tuple, undef, idx -> tuple
21886 if (N->getOperand(1).isUndef())
21887 return N->getOperand(0);
21888 break;
21889 }
21890 case RISCVISD::VMERGE_VL: {
21891 // vmerge_vl allones, x, y, passthru, vl -> vmv_v_v passthru, x, vl
21892 SDValue Mask = N->getOperand(0);
21893 SDValue True = N->getOperand(1);
21894 SDValue Passthru = N->getOperand(3);
21895 SDValue VL = N->getOperand(4);
21896
21897 // Fixed vectors are wrapped in scalable containers, unwrap them.
21898 using namespace SDPatternMatch;
21899 SDValue SubVec;
21900 if (sd_match(Mask, m_InsertSubvector(m_Undef(), m_Value(SubVec), m_Zero())))
21901 Mask = SubVec;
21902
21903 if (!isOneOrOneSplat(Mask))
21904 break;
21905
21906 return DAG.getNode(RISCVISD::VMV_V_V_VL, SDLoc(N), N->getSimpleValueType(0),
21907 Passthru, True, VL);
21908 }
21909 case RISCVISD::VMV_V_V_VL: {
21910 // vmv_v_v passthru, splat(x), vl -> vmv_v_x passthru, x, vl
21911 SDValue Passthru = N->getOperand(0);
21912 SDValue Src = N->getOperand(1);
21913 SDValue VL = N->getOperand(2);
21914
21915 // Fixed vectors are wrapped in scalable containers, unwrap them.
21916 using namespace SDPatternMatch;
21917 SDValue SubVec;
21918 if (sd_match(Src, m_InsertSubvector(m_Undef(), m_Value(SubVec), m_Zero())))
21919 Src = SubVec;
21920
21921 SDValue SplatVal = DAG.getSplatValue(Src);
21922 if (!SplatVal)
21923 break;
21924 MVT VT = N->getSimpleValueType(0);
21925 return lowerScalarSplat(Passthru, SplatVal, VL, VT, SDLoc(N), DAG,
21926 Subtarget);
21927 }
21928 case RISCVISD::VSLIDEDOWN_VL:
21929 case RISCVISD::VSLIDEUP_VL:
21930 if (N->getOperand(1)->isUndef())
21931 return N->getOperand(0);
21932 break;
21933 case RISCVISD::VSLIDE1UP_VL:
21934 case RISCVISD::VFSLIDE1UP_VL: {
21935 using namespace SDPatternMatch;
21936 SDValue SrcVec;
21937 SDLoc DL(N);
21938 MVT VT = N->getSimpleValueType(0);
21939 // If the scalar we're sliding in was extracted from the first element of a
21940 // vector, we can use that vector as the passthru in a normal slideup of 1.
21941 // This saves us an extract_element instruction (i.e. vfmv.f.s, vmv.x.s).
21942 if (!N->getOperand(0).isUndef() ||
21943 !sd_match(N->getOperand(2),
21944 m_AnyOf(m_ExtractElt(m_Value(SrcVec), m_Zero()),
21945 m_Node(RISCVISD::VMV_X_S, m_Value(SrcVec)))))
21946 break;
21947
21948 MVT SrcVecVT = SrcVec.getSimpleValueType();
21949 if (SrcVecVT.getVectorElementType() != VT.getVectorElementType())
21950 break;
21951 // Adapt the value type of source vector.
21952 if (SrcVecVT.isFixedLengthVector()) {
21953 SrcVecVT = getContainerForFixedLengthVector(SrcVecVT);
21954 SrcVec = convertToScalableVector(SrcVecVT, SrcVec, DAG, Subtarget);
21955 }
21957 SrcVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), SrcVec, 0);
21958 else
21959 SrcVec = DAG.getExtractSubvector(DL, VT, SrcVec, 0);
21960
21961 return getVSlideup(DAG, Subtarget, DL, VT, SrcVec, N->getOperand(1),
21962 DAG.getConstant(1, DL, XLenVT), N->getOperand(3),
21963 N->getOperand(4));
21964 }
21965 }
21966
21967 return SDValue();
21968}
21969
21971 EVT XVT, unsigned KeptBits) const {
21972 // For vectors, we don't have a preference..
21973 if (XVT.isVector())
21974 return false;
21975
21976 if (XVT != MVT::i32 && XVT != MVT::i64)
21977 return false;
21978
21979 // We can use sext.w for RV64 or an srai 31 on RV32.
21980 if (KeptBits == 32 || KeptBits == 64)
21981 return true;
21982
21983 // With Zbb we can use sext.h/sext.b.
21984 return Subtarget.hasStdExtZbb() &&
21985 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
21986 KeptBits == 16);
21987}
21988
21990 const SDNode *N, CombineLevel Level) const {
21991 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
21992 N->getOpcode() == ISD::SRL) &&
21993 "Expected shift op");
21994
21995 // The following folds are only desirable if `(OP _, c1 << c2)` can be
21996 // materialised in fewer instructions than `(OP _, c1)`:
21997 //
21998 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
21999 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
22000 SDValue N0 = N->getOperand(0);
22001 EVT Ty = N0.getValueType();
22002
22003 // LD/ST will optimize constant Offset extraction, so when AddNode is used by
22004 // LD/ST, it can still complete the folding optimization operation performed
22005 // above.
22006 auto isUsedByLdSt = [](const SDNode *X, const SDNode *User) {
22007 for (SDNode *Use : X->users()) {
22008 // This use is the one we're on right now. Skip it
22009 if (Use == User || Use->getOpcode() == ISD::SELECT)
22010 continue;
22012 return false;
22013 }
22014 return true;
22015 };
22016
22017 if (Ty.isScalarInteger() &&
22018 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
22019 if (N0.getOpcode() == ISD::ADD && !N0->hasOneUse())
22020 return isUsedByLdSt(N0.getNode(), N);
22021
22022 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
22023 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
22024
22025 // Bail if we might break a sh{1,2,3}add/qc.shladd pattern.
22026 if (C2 && Subtarget.hasShlAdd(C2->getZExtValue()) && N->hasOneUse() &&
22027 N->user_begin()->getOpcode() == ISD::ADD &&
22028 !isUsedByLdSt(*N->user_begin(), nullptr) &&
22029 !isa<ConstantSDNode>(N->user_begin()->getOperand(1)))
22030 return false;
22031
22032 if (C1 && C2) {
22033 const APInt &C1Int = C1->getAPIntValue();
22034 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
22035
22036 // We can materialise `c1 << c2` into an add immediate, so it's "free",
22037 // and the combine should happen, to potentially allow further combines
22038 // later.
22039 if (ShiftedC1Int.getSignificantBits() <= 64 &&
22040 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
22041 return true;
22042
22043 // We can materialise `c1` in an add immediate, so it's "free", and the
22044 // combine should be prevented.
22045 if (C1Int.getSignificantBits() <= 64 &&
22047 return false;
22048
22049 // Neither constant will fit into an immediate, so find materialisation
22050 // costs.
22051 int C1Cost =
22052 RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
22053 /*CompressionCost*/ true);
22054 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
22055 ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
22056 /*CompressionCost*/ true);
22057
22058 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
22059 // combine should be prevented.
22060 if (C1Cost < ShiftedC1Cost)
22061 return false;
22062 }
22063 }
22064
22065 if (!N0->hasOneUse())
22066 return false;
22067
22068 if (N0->getOpcode() == ISD::SIGN_EXTEND &&
22069 N0->getOperand(0)->getOpcode() == ISD::ADD &&
22070 !N0->getOperand(0)->hasOneUse())
22071 return isUsedByLdSt(N0->getOperand(0).getNode(), N0.getNode());
22072
22073 return true;
22074}
22075
22077 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
22078 TargetLoweringOpt &TLO) const {
22079 // Delay this optimization as late as possible.
22080 if (!TLO.LegalOps)
22081 return false;
22082
22083 EVT VT = Op.getValueType();
22084 if (VT.isVector())
22085 return false;
22086
22087 unsigned Opcode = Op.getOpcode();
22088 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
22089 return false;
22090
22091 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
22092 if (!C)
22093 return false;
22094
22095 const APInt &Mask = C->getAPIntValue();
22096
22097 // Clear all non-demanded bits initially.
22098 APInt ShrunkMask = Mask & DemandedBits;
22099
22100 // Try to make a smaller immediate by setting undemanded bits.
22101
22102 APInt ExpandedMask = Mask | ~DemandedBits;
22103
22104 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
22105 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
22106 };
22107 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
22108 if (NewMask == Mask)
22109 return true;
22110 SDLoc DL(Op);
22111 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
22112 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
22113 Op.getOperand(0), NewC);
22114 return TLO.CombineTo(Op, NewOp);
22115 };
22116
22117 // If the shrunk mask fits in sign extended 12 bits, let the target
22118 // independent code apply it.
22119 if (ShrunkMask.isSignedIntN(12))
22120 return false;
22121
22122 // And has a few special cases for zext.
22123 if (Opcode == ISD::AND) {
22124 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
22125 // otherwise use SLLI + SRLI.
22126 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
22127 if (IsLegalMask(NewMask))
22128 return UseMask(NewMask);
22129
22130 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
22131 if (VT == MVT::i64) {
22132 APInt NewMask = APInt(64, 0xffffffff);
22133 if (IsLegalMask(NewMask))
22134 return UseMask(NewMask);
22135 }
22136 }
22137
22138 // For the remaining optimizations, we need to be able to make a negative
22139 // number through a combination of mask and undemanded bits.
22140 if (!ExpandedMask.isNegative())
22141 return false;
22142
22143 // What is the fewest number of bits we need to represent the negative number.
22144 unsigned MinSignedBits = ExpandedMask.getSignificantBits();
22145
22146 // Try to make a 12 bit negative immediate. If that fails try to make a 32
22147 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
22148 // If we can't create a simm12, we shouldn't change opaque constants.
22149 APInt NewMask = ShrunkMask;
22150 if (MinSignedBits <= 12)
22151 NewMask.setBitsFrom(11);
22152 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
22153 NewMask.setBitsFrom(31);
22154 else
22155 return false;
22156
22157 // Check that our new mask is a subset of the demanded mask.
22158 assert(IsLegalMask(NewMask));
22159 return UseMask(NewMask);
22160}
22161
22162static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
22163 static const uint64_t GREVMasks[] = {
22164 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
22165 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
22166
22167 for (unsigned Stage = 0; Stage != 6; ++Stage) {
22168 unsigned Shift = 1 << Stage;
22169 if (ShAmt & Shift) {
22170 uint64_t Mask = GREVMasks[Stage];
22171 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
22172 if (IsGORC)
22173 Res |= x;
22174 x = Res;
22175 }
22176 }
22177
22178 return x;
22179}
22180
22182 KnownBits &Known,
22183 const APInt &DemandedElts,
22184 const SelectionDAG &DAG,
22185 unsigned Depth) const {
22186 unsigned BitWidth = Known.getBitWidth();
22187 unsigned Opc = Op.getOpcode();
22192 "Should use MaskedValueIsZero if you don't know whether Op"
22193 " is a target node!");
22194
22195 Known.resetAll();
22196 switch (Opc) {
22197 default: break;
22198 case RISCVISD::SELECT_CC: {
22199 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
22200 // If we don't know any bits, early out.
22201 if (Known.isUnknown())
22202 break;
22203 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
22204
22205 // Only known if known in both the LHS and RHS.
22206 Known = Known.intersectWith(Known2);
22207 break;
22208 }
22209 case RISCVISD::VCPOP_VL: {
22210 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(2), Depth + 1);
22211 Known.Zero.setBitsFrom(Known2.countMaxActiveBits());
22212 break;
22213 }
22214 case RISCVISD::CZERO_EQZ:
22215 case RISCVISD::CZERO_NEZ:
22216 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
22217 // Result is either all zero or operand 0. We can propagate zeros, but not
22218 // ones.
22219 Known.One.clearAllBits();
22220 break;
22221 case RISCVISD::REMUW: {
22222 KnownBits Known2;
22223 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
22224 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
22225 // We only care about the lower 32 bits.
22226 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
22227 // Restore the original width by sign extending.
22228 Known = Known.sext(BitWidth);
22229 break;
22230 }
22231 case RISCVISD::DIVUW: {
22232 KnownBits Known2;
22233 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
22234 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
22235 // We only care about the lower 32 bits.
22236 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
22237 // Restore the original width by sign extending.
22238 Known = Known.sext(BitWidth);
22239 break;
22240 }
22241 case RISCVISD::SLLW: {
22242 KnownBits Known2;
22243 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
22244 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
22245 Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));
22246 // Restore the original width by sign extending.
22247 Known = Known.sext(BitWidth);
22248 break;
22249 }
22250 case RISCVISD::SRLW: {
22251 KnownBits Known2;
22252 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
22253 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
22254 Known = KnownBits::lshr(Known.trunc(32), Known2.trunc(5).zext(32));
22255 // Restore the original width by sign extending.
22256 Known = Known.sext(BitWidth);
22257 break;
22258 }
22259 case RISCVISD::SRAW: {
22260 KnownBits Known2;
22261 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
22262 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
22263 Known = KnownBits::ashr(Known.trunc(32), Known2.trunc(5).zext(32));
22264 // Restore the original width by sign extending.
22265 Known = Known.sext(BitWidth);
22266 break;
22267 }
22268 case RISCVISD::SHL_ADD: {
22269 KnownBits Known2;
22270 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
22271 unsigned ShAmt = Op.getConstantOperandVal(1);
22272 Known <<= ShAmt;
22273 Known.Zero.setLowBits(ShAmt); // the <<= operator left these bits unknown
22274 Known2 = DAG.computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
22275 Known = KnownBits::add(Known, Known2);
22276 break;
22277 }
22278 case RISCVISD::CTZW: {
22279 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
22280 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
22281 unsigned LowBits = llvm::bit_width(PossibleTZ);
22282 Known.Zero.setBitsFrom(LowBits);
22283 break;
22284 }
22285 case RISCVISD::CLZW: {
22286 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
22287 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
22288 unsigned LowBits = llvm::bit_width(PossibleLZ);
22289 Known.Zero.setBitsFrom(LowBits);
22290 break;
22291 }
22292 case RISCVISD::BREV8:
22293 case RISCVISD::ORC_B: {
22294 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
22295 // control value of 7 is equivalent to brev8 and orc.b.
22296 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
22297 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
22298 // To compute zeros for ORC_B, we need to invert the value and invert it
22299 // back after. This inverting is harmless for BREV8.
22300 Known.Zero =
22301 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
22302 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
22303 break;
22304 }
22305 case RISCVISD::READ_VLENB: {
22306 // We can use the minimum and maximum VLEN values to bound VLENB. We
22307 // know VLEN must be a power of two.
22308 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
22309 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
22310 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
22311 Known.Zero.setLowBits(Log2_32(MinVLenB));
22312 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
22313 if (MaxVLenB == MinVLenB)
22314 Known.One.setBit(Log2_32(MinVLenB));
22315 break;
22316 }
22317 case RISCVISD::FCLASS: {
22318 // fclass will only set one of the low 10 bits.
22319 Known.Zero.setBitsFrom(10);
22320 break;
22321 }
22324 unsigned IntNo =
22325 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
22326 switch (IntNo) {
22327 default:
22328 // We can't do anything for most intrinsics.
22329 break;
22330 case Intrinsic::riscv_vsetvli:
22331 case Intrinsic::riscv_vsetvlimax: {
22332 bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
22333 unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1);
22334 RISCVVType::VLMUL VLMUL =
22335 static_cast<RISCVVType::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));
22336 unsigned SEW = RISCVVType::decodeVSEW(VSEW);
22337 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
22338 uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
22339 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
22340
22341 // Result of vsetvli must be not larger than AVL.
22342 if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))
22343 MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1));
22344
22345 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
22346 if (BitWidth > KnownZeroFirstBit)
22347 Known.Zero.setBitsFrom(KnownZeroFirstBit);
22348 break;
22349 }
22350 }
22351 break;
22352 }
22353 }
22354}
22355
22357 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
22358 unsigned Depth) const {
22359 switch (Op.getOpcode()) {
22360 default:
22361 break;
22362 case RISCVISD::SELECT_CC: {
22363 unsigned Tmp =
22364 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
22365 if (Tmp == 1) return 1; // Early out.
22366 unsigned Tmp2 =
22367 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
22368 return std::min(Tmp, Tmp2);
22369 }
22370 case RISCVISD::CZERO_EQZ:
22371 case RISCVISD::CZERO_NEZ:
22372 // Output is either all zero or operand 0. We can propagate sign bit count
22373 // from operand 0.
22374 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
22375 case RISCVISD::NEGW_MAX: {
22376 // We expand this at isel to negw+max. The result will have 33 sign bits
22377 // if the input has at least 33 sign bits.
22378 unsigned Tmp =
22379 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
22380 if (Tmp < 33) return 1;
22381 return 33;
22382 }
22383 case RISCVISD::SRAW: {
22384 unsigned Tmp =
22385 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
22386 // sraw produces at least 33 sign bits. If the input already has more than
22387 // 33 sign bits sraw, will preserve them.
22388 // TODO: A more precise answer could be calculated depending on known bits
22389 // in the shift amount.
22390 return std::max(Tmp, 33U);
22391 }
22392 case RISCVISD::SLLW:
22393 case RISCVISD::SRLW:
22394 case RISCVISD::DIVW:
22395 case RISCVISD::DIVUW:
22396 case RISCVISD::REMUW:
22397 case RISCVISD::ROLW:
22398 case RISCVISD::RORW:
22399 case RISCVISD::ABSW:
22400 case RISCVISD::FCVT_W_RV64:
22401 case RISCVISD::FCVT_WU_RV64:
22402 case RISCVISD::STRICT_FCVT_W_RV64:
22403 case RISCVISD::STRICT_FCVT_WU_RV64:
22404 // TODO: As the result is sign-extended, this is conservatively correct.
22405 return 33;
22406 case RISCVISD::VMV_X_S: {
22407 // The number of sign bits of the scalar result is computed by obtaining the
22408 // element type of the input vector operand, subtracting its width from the
22409 // XLEN, and then adding one (sign bit within the element type). If the
22410 // element type is wider than XLen, the least-significant XLEN bits are
22411 // taken.
22412 unsigned XLen = Subtarget.getXLen();
22413 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
22414 if (EltBits <= XLen)
22415 return XLen - EltBits + 1;
22416 break;
22417 }
22419 unsigned IntNo = Op.getConstantOperandVal(1);
22420 switch (IntNo) {
22421 default:
22422 break;
22423 case Intrinsic::riscv_masked_atomicrmw_xchg:
22424 case Intrinsic::riscv_masked_atomicrmw_add:
22425 case Intrinsic::riscv_masked_atomicrmw_sub:
22426 case Intrinsic::riscv_masked_atomicrmw_nand:
22427 case Intrinsic::riscv_masked_atomicrmw_max:
22428 case Intrinsic::riscv_masked_atomicrmw_min:
22429 case Intrinsic::riscv_masked_atomicrmw_umax:
22430 case Intrinsic::riscv_masked_atomicrmw_umin:
22431 case Intrinsic::riscv_masked_cmpxchg:
22432 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
22433 // narrow atomic operation. These are implemented using atomic
22434 // operations at the minimum supported atomicrmw/cmpxchg width whose
22435 // result is then sign extended to XLEN. With +A, the minimum width is
22436 // 32 for both 64 and 32.
22438 assert(Subtarget.hasStdExtZalrsc());
22439 return Op.getValueSizeInBits() - 31;
22440 }
22441 break;
22442 }
22443 }
22444
22445 return 1;
22446}
22447
22449 SDValue Op, const APInt &OriginalDemandedBits,
22450 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
22451 unsigned Depth) const {
22452 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
22453
22454 switch (Op.getOpcode()) {
22455 case RISCVISD::BREV8:
22456 case RISCVISD::ORC_B: {
22457 KnownBits Known2;
22458 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
22459 // For BREV8, we need to do BREV8 on the demanded bits.
22460 // For ORC_B, any bit in the output demandeds all bits from the same byte.
22461 // So we need to do ORC_B on the demanded bits.
22463 APInt(BitWidth, computeGREVOrGORC(OriginalDemandedBits.getZExtValue(),
22464 7, IsGORC));
22465 if (SimplifyDemandedBits(Op.getOperand(0), DemandedBits,
22466 OriginalDemandedElts, Known2, TLO, Depth + 1))
22467 return true;
22468
22469 // To compute zeros for ORC_B, we need to invert the value and invert it
22470 // back after. This inverting is harmless for BREV8.
22471 Known.Zero = ~computeGREVOrGORC(~Known2.Zero.getZExtValue(), 7, IsGORC);
22472 Known.One = computeGREVOrGORC(Known2.One.getZExtValue(), 7, IsGORC);
22473 return false;
22474 }
22475 }
22476
22478 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
22479}
22480
22482 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
22483 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
22484
22485 // TODO: Add more target nodes.
22486 switch (Op.getOpcode()) {
22487 case RISCVISD::SLLW:
22488 case RISCVISD::SRAW:
22489 case RISCVISD::SRLW:
22490 case RISCVISD::RORW:
22491 case RISCVISD::ROLW:
22492 // Only the lower 5 bits of RHS are read, guaranteeing the rotate/shift
22493 // amount is bounds.
22494 return false;
22495 case RISCVISD::SELECT_CC:
22496 // Integer comparisons cannot create poison.
22497 assert(Op.getOperand(0).getValueType().isInteger() &&
22498 "RISCVISD::SELECT_CC only compares integers");
22499 return false;
22500 }
22502 Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
22503}
22504
22505const Constant *
22507 assert(Ld && "Unexpected null LoadSDNode");
22508 if (!ISD::isNormalLoad(Ld))
22509 return nullptr;
22510
22511 SDValue Ptr = Ld->getBasePtr();
22512
22513 // Only constant pools with no offset are supported.
22514 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
22515 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
22516 if (!CNode || CNode->isMachineConstantPoolEntry() ||
22517 CNode->getOffset() != 0)
22518 return nullptr;
22519
22520 return CNode;
22521 };
22522
22523 // Simple case, LLA.
22524 if (Ptr.getOpcode() == RISCVISD::LLA) {
22525 auto *CNode = GetSupportedConstantPool(Ptr.getOperand(0));
22526 if (!CNode || CNode->getTargetFlags() != 0)
22527 return nullptr;
22528
22529 return CNode->getConstVal();
22530 }
22531
22532 // Look for a HI and ADD_LO pair.
22533 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
22534 Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
22535 return nullptr;
22536
22537 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
22538 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
22539
22540 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
22541 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
22542 return nullptr;
22543
22544 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
22545 return nullptr;
22546
22547 return CNodeLo->getConstVal();
22548}
22549
22551 MachineBasicBlock *BB) {
22552 assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction");
22553
22554 // To read a 64-bit counter CSR on a 32-bit target, we read the two halves.
22555 // Should the count have wrapped while it was being read, we need to try
22556 // again.
22557 // For example:
22558 // ```
22559 // read:
22560 // csrrs x3, counterh # load high word of counter
22561 // csrrs x2, counter # load low word of counter
22562 // csrrs x4, counterh # load high word of counter
22563 // bne x3, x4, read # check if high word reads match, otherwise try again
22564 // ```
22565
22566 MachineFunction &MF = *BB->getParent();
22567 const BasicBlock *LLVMBB = BB->getBasicBlock();
22569
22570 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVMBB);
22571 MF.insert(It, LoopMBB);
22572
22573 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVMBB);
22574 MF.insert(It, DoneMBB);
22575
22576 // Transfer the remainder of BB and its successor edges to DoneMBB.
22577 DoneMBB->splice(DoneMBB->begin(), BB,
22578 std::next(MachineBasicBlock::iterator(MI)), BB->end());
22580
22581 BB->addSuccessor(LoopMBB);
22582
22584 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
22585 Register LoReg = MI.getOperand(0).getReg();
22586 Register HiReg = MI.getOperand(1).getReg();
22587 int64_t LoCounter = MI.getOperand(2).getImm();
22588 int64_t HiCounter = MI.getOperand(3).getImm();
22589 DebugLoc DL = MI.getDebugLoc();
22590
22592 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
22593 .addImm(HiCounter)
22594 .addReg(RISCV::X0);
22595 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
22596 .addImm(LoCounter)
22597 .addReg(RISCV::X0);
22598 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
22599 .addImm(HiCounter)
22600 .addReg(RISCV::X0);
22601
22602 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
22603 .addReg(HiReg)
22604 .addReg(ReadAgainReg)
22605 .addMBB(LoopMBB);
22606
22607 LoopMBB->addSuccessor(LoopMBB);
22608 LoopMBB->addSuccessor(DoneMBB);
22609
22610 MI.eraseFromParent();
22611
22612 return DoneMBB;
22613}
22614
22617 const RISCVSubtarget &Subtarget) {
22618 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
22619
22620 MachineFunction &MF = *BB->getParent();
22621 DebugLoc DL = MI.getDebugLoc();
22622 const RISCVInstrInfo &TII = *MF.getSubtarget<RISCVSubtarget>().getInstrInfo();
22623 Register LoReg = MI.getOperand(0).getReg();
22624 Register HiReg = MI.getOperand(1).getReg();
22625 Register SrcReg = MI.getOperand(2).getReg();
22626
22627 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
22628 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
22629
22630 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
22631 Register());
22633 MachineMemOperand *MMOLo =
22637 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
22638 .addFrameIndex(FI)
22639 .addImm(0)
22640 .addMemOperand(MMOLo);
22641 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
22642 .addFrameIndex(FI)
22643 .addImm(4)
22644 .addMemOperand(MMOHi);
22645 MI.eraseFromParent(); // The pseudo instruction is gone now.
22646 return BB;
22647}
22648
22651 const RISCVSubtarget &Subtarget) {
22652 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
22653 "Unexpected instruction");
22654
22655 MachineFunction &MF = *BB->getParent();
22656 DebugLoc DL = MI.getDebugLoc();
22657 const RISCVInstrInfo &TII = *MF.getSubtarget<RISCVSubtarget>().getInstrInfo();
22658 Register DstReg = MI.getOperand(0).getReg();
22659 Register LoReg = MI.getOperand(1).getReg();
22660 Register HiReg = MI.getOperand(2).getReg();
22661
22662 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
22663 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
22664
22666 MachineMemOperand *MMOLo =
22670 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
22671 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
22672 .addFrameIndex(FI)
22673 .addImm(0)
22674 .addMemOperand(MMOLo);
22675 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
22676 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
22677 .addFrameIndex(FI)
22678 .addImm(4)
22679 .addMemOperand(MMOHi);
22680 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, Register());
22681 MI.eraseFromParent(); // The pseudo instruction is gone now.
22682 return BB;
22683}
22684
22686 unsigned RelOpcode, unsigned EqOpcode,
22687 const RISCVSubtarget &Subtarget) {
22688 DebugLoc DL = MI.getDebugLoc();
22689 Register DstReg = MI.getOperand(0).getReg();
22690 Register Src1Reg = MI.getOperand(1).getReg();
22691 Register Src2Reg = MI.getOperand(2).getReg();
22693 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
22695
22696 // Save the current FFLAGS.
22697 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
22698
22699 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
22700 .addReg(Src1Reg)
22701 .addReg(Src2Reg);
22704
22705 // Restore the FFLAGS.
22706 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
22707 .addReg(SavedFFlags, RegState::Kill);
22708
22709 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
22710 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
22711 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
22712 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
22715
22716 // Erase the pseudoinstruction.
22717 MI.eraseFromParent();
22718 return BB;
22719}
22720
22721static MachineBasicBlock *
22723 MachineBasicBlock *ThisMBB,
22724 const RISCVSubtarget &Subtarget) {
22725 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
22726 // Without this, custom-inserter would have generated:
22727 //
22728 // A
22729 // | \
22730 // | B
22731 // | /
22732 // C
22733 // | \
22734 // | D
22735 // | /
22736 // E
22737 //
22738 // A: X = ...; Y = ...
22739 // B: empty
22740 // C: Z = PHI [X, A], [Y, B]
22741 // D: empty
22742 // E: PHI [X, C], [Z, D]
22743 //
22744 // If we lower both Select_FPRX_ in a single step, we can instead generate:
22745 //
22746 // A
22747 // | \
22748 // | C
22749 // | /|
22750 // |/ |
22751 // | |
22752 // | D
22753 // | /
22754 // E
22755 //
22756 // A: X = ...; Y = ...
22757 // D: empty
22758 // E: PHI [X, A], [X, C], [Y, D]
22759
22760 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
22761 const DebugLoc &DL = First.getDebugLoc();
22762 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
22763 MachineFunction *F = ThisMBB->getParent();
22764 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
22765 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
22766 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
22767 MachineFunction::iterator It = ++ThisMBB->getIterator();
22768 F->insert(It, FirstMBB);
22769 F->insert(It, SecondMBB);
22770 F->insert(It, SinkMBB);
22771
22772 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
22773 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
22775 ThisMBB->end());
22776 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
22777
22778 // Fallthrough block for ThisMBB.
22779 ThisMBB->addSuccessor(FirstMBB);
22780 // Fallthrough block for FirstMBB.
22781 FirstMBB->addSuccessor(SecondMBB);
22782 ThisMBB->addSuccessor(SinkMBB);
22783 FirstMBB->addSuccessor(SinkMBB);
22784 // This is fallthrough.
22785 SecondMBB->addSuccessor(SinkMBB);
22786
22787 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
22788 Register FLHS = First.getOperand(1).getReg();
22789 Register FRHS = First.getOperand(2).getReg();
22790 // Insert appropriate branch.
22791 BuildMI(FirstMBB, DL, TII.get(RISCVCC::getBrCond(FirstCC, First.getOpcode())))
22792 .addReg(FLHS)
22793 .addReg(FRHS)
22794 .addMBB(SinkMBB);
22795
22796 Register SLHS = Second.getOperand(1).getReg();
22797 Register SRHS = Second.getOperand(2).getReg();
22798 Register Op1Reg4 = First.getOperand(4).getReg();
22799 Register Op1Reg5 = First.getOperand(5).getReg();
22800
22801 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
22802 // Insert appropriate branch.
22803 BuildMI(ThisMBB, DL,
22804 TII.get(RISCVCC::getBrCond(SecondCC, Second.getOpcode())))
22805 .addReg(SLHS)
22806 .addReg(SRHS)
22807 .addMBB(SinkMBB);
22808
22809 Register DestReg = Second.getOperand(0).getReg();
22810 Register Op2Reg4 = Second.getOperand(4).getReg();
22811 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
22812 .addReg(Op2Reg4)
22813 .addMBB(ThisMBB)
22814 .addReg(Op1Reg4)
22815 .addMBB(FirstMBB)
22816 .addReg(Op1Reg5)
22817 .addMBB(SecondMBB);
22818
22819 // Now remove the Select_FPRX_s.
22820 First.eraseFromParent();
22821 Second.eraseFromParent();
22822 return SinkMBB;
22823}
22824
22827 const RISCVSubtarget &Subtarget) {
22828 // To "insert" Select_* instructions, we actually have to insert the triangle
22829 // control-flow pattern. The incoming instructions know the destination vreg
22830 // to set, the condition code register to branch on, the true/false values to
22831 // select between, and the condcode to use to select the appropriate branch.
22832 //
22833 // We produce the following control flow:
22834 // HeadMBB
22835 // | \
22836 // | IfFalseMBB
22837 // | /
22838 // TailMBB
22839 //
22840 // When we find a sequence of selects we attempt to optimize their emission
22841 // by sharing the control flow. Currently we only handle cases where we have
22842 // multiple selects with the exact same condition (same LHS, RHS and CC).
22843 // The selects may be interleaved with other instructions if the other
22844 // instructions meet some requirements we deem safe:
22845 // - They are not pseudo instructions.
22846 // - They are debug instructions. Otherwise,
22847 // - They do not have side-effects, do not access memory and their inputs do
22848 // not depend on the results of the select pseudo-instructions.
22849 // - They don't adjust stack.
22850 // The TrueV/FalseV operands of the selects cannot depend on the result of
22851 // previous selects in the sequence.
22852 // These conditions could be further relaxed. See the X86 target for a
22853 // related approach and more information.
22854 //
22855 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
22856 // is checked here and handled by a separate function -
22857 // EmitLoweredCascadedSelect.
22858
22859 auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
22860 if (MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR &&
22861 MI.getOperand(1).isReg() && MI.getOperand(2).isReg() &&
22862 Next != BB->end() && Next->getOpcode() == MI.getOpcode() &&
22863 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
22864 Next->getOperand(5).isKill())
22865 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
22866
22867 Register LHS = MI.getOperand(1).getReg();
22868 Register RHS;
22869 if (MI.getOperand(2).isReg())
22870 RHS = MI.getOperand(2).getReg();
22871 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
22872
22873 SmallVector<MachineInstr *, 4> SelectDebugValues;
22874 SmallSet<Register, 4> SelectDests;
22875 SelectDests.insert(MI.getOperand(0).getReg());
22876
22877 MachineInstr *LastSelectPseudo = &MI;
22878 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
22879
22880 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
22881 SequenceMBBI != E; ++SequenceMBBI) {
22882 if (SequenceMBBI->isDebugInstr())
22883 continue;
22884 if (RISCVInstrInfo::isSelectPseudo(*SequenceMBBI)) {
22885 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
22886 !SequenceMBBI->getOperand(2).isReg() ||
22887 SequenceMBBI->getOperand(2).getReg() != RHS ||
22888 SequenceMBBI->getOperand(3).getImm() != CC ||
22889 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
22890 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
22891 break;
22892 LastSelectPseudo = &*SequenceMBBI;
22893 SequenceMBBI->collectDebugValues(SelectDebugValues);
22894 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
22895 continue;
22896 }
22897 if (SequenceMBBI->hasUnmodeledSideEffects() ||
22898 SequenceMBBI->mayLoadOrStore() ||
22899 SequenceMBBI->usesCustomInsertionHook() ||
22900 TII.isFrameInstr(*SequenceMBBI) ||
22901 SequenceMBBI->isStackAligningInlineAsm())
22902 break;
22903 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
22904 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
22905 }))
22906 break;
22907 }
22908
22909 const BasicBlock *LLVM_BB = BB->getBasicBlock();
22910 DebugLoc DL = MI.getDebugLoc();
22912
22913 MachineBasicBlock *HeadMBB = BB;
22914 MachineFunction *F = BB->getParent();
22915 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
22916 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
22917
22918 F->insert(I, IfFalseMBB);
22919 F->insert(I, TailMBB);
22920
22921 // Set the call frame size on entry to the new basic blocks.
22922 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
22923 IfFalseMBB->setCallFrameSize(CallFrameSize);
22924 TailMBB->setCallFrameSize(CallFrameSize);
22925
22926 // Transfer debug instructions associated with the selects to TailMBB.
22927 for (MachineInstr *DebugInstr : SelectDebugValues) {
22928 TailMBB->push_back(DebugInstr->removeFromParent());
22929 }
22930
22931 // Move all instructions after the sequence to TailMBB.
22932 TailMBB->splice(TailMBB->end(), HeadMBB,
22933 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
22934 // Update machine-CFG edges by transferring all successors of the current
22935 // block to the new block which will contain the Phi nodes for the selects.
22936 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
22937 // Set the successors for HeadMBB.
22938 HeadMBB->addSuccessor(IfFalseMBB);
22939 HeadMBB->addSuccessor(TailMBB);
22940
22941 // Insert appropriate branch.
22942 if (MI.getOperand(2).isImm())
22943 BuildMI(HeadMBB, DL, TII.get(RISCVCC::getBrCond(CC, MI.getOpcode())))
22944 .addReg(LHS)
22945 .addImm(MI.getOperand(2).getImm())
22946 .addMBB(TailMBB);
22947 else
22948 BuildMI(HeadMBB, DL, TII.get(RISCVCC::getBrCond(CC, MI.getOpcode())))
22949 .addReg(LHS)
22950 .addReg(RHS)
22951 .addMBB(TailMBB);
22952
22953 // IfFalseMBB just falls through to TailMBB.
22954 IfFalseMBB->addSuccessor(TailMBB);
22955
22956 // Create PHIs for all of the select pseudo-instructions.
22957 auto SelectMBBI = MI.getIterator();
22958 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
22959 auto InsertionPoint = TailMBB->begin();
22960 while (SelectMBBI != SelectEnd) {
22961 auto Next = std::next(SelectMBBI);
22962 if (RISCVInstrInfo::isSelectPseudo(*SelectMBBI)) {
22963 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
22964 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
22965 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
22966 .addReg(SelectMBBI->getOperand(4).getReg())
22967 .addMBB(HeadMBB)
22968 .addReg(SelectMBBI->getOperand(5).getReg())
22969 .addMBB(IfFalseMBB);
22970 SelectMBBI->eraseFromParent();
22971 }
22972 SelectMBBI = Next;
22973 }
22974
22975 F->getProperties().resetNoPHIs();
22976 return TailMBB;
22977}
22978
22979// Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW.
22980static const RISCV::RISCVMaskedPseudoInfo *
22981lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVVType::VLMUL LMul, unsigned SEW) {
22983 RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW);
22984 assert(Inverse && "Unexpected LMUL and SEW pair for instruction");
22986 RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo);
22987 assert(Masked && "Could not find masked instruction for LMUL and SEW pair");
22988 return Masked;
22989}
22990
22993 unsigned CVTXOpc) {
22994 DebugLoc DL = MI.getDebugLoc();
22995
22997
22999 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
23000
23001 // Save the old value of FFLAGS.
23002 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
23003
23004 assert(MI.getNumOperands() == 7);
23005
23006 // Emit a VFCVT_X_F
23007 const TargetRegisterInfo *TRI =
23009 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
23010 Register Tmp = MRI.createVirtualRegister(RC);
23011 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
23012 .add(MI.getOperand(1))
23013 .add(MI.getOperand(2))
23014 .add(MI.getOperand(3))
23015 .add(MachineOperand::CreateImm(7)) // frm = DYN
23016 .add(MI.getOperand(4))
23017 .add(MI.getOperand(5))
23018 .add(MI.getOperand(6))
23019 .add(MachineOperand::CreateReg(RISCV::FRM,
23020 /*IsDef*/ false,
23021 /*IsImp*/ true));
23022
23023 // Emit a VFCVT_F_X
23024 RISCVVType::VLMUL LMul = RISCVII::getLMul(MI.getDesc().TSFlags);
23025 unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
23026 // There is no E8 variant for VFCVT_F_X.
23027 assert(Log2SEW >= 4);
23028 unsigned CVTFOpc =
23029 lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, 1 << Log2SEW)
23030 ->MaskedPseudo;
23031
23032 BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
23033 .add(MI.getOperand(0))
23034 .add(MI.getOperand(1))
23035 .addReg(Tmp)
23036 .add(MI.getOperand(3))
23037 .add(MachineOperand::CreateImm(7)) // frm = DYN
23038 .add(MI.getOperand(4))
23039 .add(MI.getOperand(5))
23040 .add(MI.getOperand(6))
23041 .add(MachineOperand::CreateReg(RISCV::FRM,
23042 /*IsDef*/ false,
23043 /*IsImp*/ true));
23044
23045 // Restore FFLAGS.
23046 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
23047 .addReg(SavedFFLAGS, RegState::Kill);
23048
23049 // Erase the pseudoinstruction.
23050 MI.eraseFromParent();
23051 return BB;
23052}
23053
23055 const RISCVSubtarget &Subtarget) {
23056 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
23057 const TargetRegisterClass *RC;
23058 switch (MI.getOpcode()) {
23059 default:
23060 llvm_unreachable("Unexpected opcode");
23061 case RISCV::PseudoFROUND_H:
23062 CmpOpc = RISCV::FLT_H;
23063 F2IOpc = RISCV::FCVT_W_H;
23064 I2FOpc = RISCV::FCVT_H_W;
23065 FSGNJOpc = RISCV::FSGNJ_H;
23066 FSGNJXOpc = RISCV::FSGNJX_H;
23067 RC = &RISCV::FPR16RegClass;
23068 break;
23069 case RISCV::PseudoFROUND_H_INX:
23070 CmpOpc = RISCV::FLT_H_INX;
23071 F2IOpc = RISCV::FCVT_W_H_INX;
23072 I2FOpc = RISCV::FCVT_H_W_INX;
23073 FSGNJOpc = RISCV::FSGNJ_H_INX;
23074 FSGNJXOpc = RISCV::FSGNJX_H_INX;
23075 RC = &RISCV::GPRF16RegClass;
23076 break;
23077 case RISCV::PseudoFROUND_S:
23078 CmpOpc = RISCV::FLT_S;
23079 F2IOpc = RISCV::FCVT_W_S;
23080 I2FOpc = RISCV::FCVT_S_W;
23081 FSGNJOpc = RISCV::FSGNJ_S;
23082 FSGNJXOpc = RISCV::FSGNJX_S;
23083 RC = &RISCV::FPR32RegClass;
23084 break;
23085 case RISCV::PseudoFROUND_S_INX:
23086 CmpOpc = RISCV::FLT_S_INX;
23087 F2IOpc = RISCV::FCVT_W_S_INX;
23088 I2FOpc = RISCV::FCVT_S_W_INX;
23089 FSGNJOpc = RISCV::FSGNJ_S_INX;
23090 FSGNJXOpc = RISCV::FSGNJX_S_INX;
23091 RC = &RISCV::GPRF32RegClass;
23092 break;
23093 case RISCV::PseudoFROUND_D:
23094 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
23095 CmpOpc = RISCV::FLT_D;
23096 F2IOpc = RISCV::FCVT_L_D;
23097 I2FOpc = RISCV::FCVT_D_L;
23098 FSGNJOpc = RISCV::FSGNJ_D;
23099 FSGNJXOpc = RISCV::FSGNJX_D;
23100 RC = &RISCV::FPR64RegClass;
23101 break;
23102 case RISCV::PseudoFROUND_D_INX:
23103 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
23104 CmpOpc = RISCV::FLT_D_INX;
23105 F2IOpc = RISCV::FCVT_L_D_INX;
23106 I2FOpc = RISCV::FCVT_D_L_INX;
23107 FSGNJOpc = RISCV::FSGNJ_D_INX;
23108 FSGNJXOpc = RISCV::FSGNJX_D_INX;
23109 RC = &RISCV::GPRRegClass;
23110 break;
23111 }
23112
23113 const BasicBlock *BB = MBB->getBasicBlock();
23114 DebugLoc DL = MI.getDebugLoc();
23115 MachineFunction::iterator I = ++MBB->getIterator();
23116
23117 MachineFunction *F = MBB->getParent();
23118 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
23119 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
23120
23121 F->insert(I, CvtMBB);
23122 F->insert(I, DoneMBB);
23123 // Move all instructions after the sequence to DoneMBB.
23124 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
23125 MBB->end());
23126 // Update machine-CFG edges by transferring all successors of the current
23127 // block to the new block which will contain the Phi nodes for the selects.
23129 // Set the successors for MBB.
23130 MBB->addSuccessor(CvtMBB);
23131 MBB->addSuccessor(DoneMBB);
23132
23133 Register DstReg = MI.getOperand(0).getReg();
23134 Register SrcReg = MI.getOperand(1).getReg();
23135 Register MaxReg = MI.getOperand(2).getReg();
23136 int64_t FRM = MI.getOperand(3).getImm();
23137
23138 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
23139 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
23140
23141 Register FabsReg = MRI.createVirtualRegister(RC);
23142 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
23143
23144 // Compare the FP value to the max value.
23145 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
23146 auto MIB =
23147 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
23150
23151 // Insert branch.
23152 BuildMI(MBB, DL, TII.get(RISCV::BEQ))
23153 .addReg(CmpReg)
23154 .addReg(RISCV::X0)
23155 .addMBB(DoneMBB);
23156
23157 CvtMBB->addSuccessor(DoneMBB);
23158
23159 // Convert to integer.
23160 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
23161 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
23164
23165 // Convert back to FP.
23166 Register I2FReg = MRI.createVirtualRegister(RC);
23167 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
23170
23171 // Restore the sign bit.
23172 Register CvtReg = MRI.createVirtualRegister(RC);
23173 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
23174
23175 // Merge the results.
23176 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
23177 .addReg(SrcReg)
23178 .addMBB(MBB)
23179 .addReg(CvtReg)
23180 .addMBB(CvtMBB);
23181
23182 MI.eraseFromParent();
23183 return DoneMBB;
23184}
23185
23188 MachineBasicBlock *BB) const {
23189 switch (MI.getOpcode()) {
23190 default:
23191 llvm_unreachable("Unexpected instr type to insert");
23192 case RISCV::ReadCounterWide:
23193 assert(!Subtarget.is64Bit() &&
23194 "ReadCounterWide is only to be used on riscv32");
23195 return emitReadCounterWidePseudo(MI, BB);
23196 case RISCV::Select_GPR_Using_CC_GPR:
23197 case RISCV::Select_GPR_Using_CC_Imm5_Zibi:
23198 case RISCV::Select_GPR_Using_CC_SImm5_CV:
23199 case RISCV::Select_GPRNoX0_Using_CC_SImm5NonZero_QC:
23200 case RISCV::Select_GPRNoX0_Using_CC_UImm5NonZero_QC:
23201 case RISCV::Select_GPRNoX0_Using_CC_SImm16NonZero_QC:
23202 case RISCV::Select_GPRNoX0_Using_CC_UImm16NonZero_QC:
23203 case RISCV::Select_GPR_Using_CC_UImmLog2XLen_NDS:
23204 case RISCV::Select_GPR_Using_CC_UImm7_NDS:
23205 case RISCV::Select_FPR16_Using_CC_GPR:
23206 case RISCV::Select_FPR16INX_Using_CC_GPR:
23207 case RISCV::Select_FPR32_Using_CC_GPR:
23208 case RISCV::Select_FPR32INX_Using_CC_GPR:
23209 case RISCV::Select_FPR64_Using_CC_GPR:
23210 case RISCV::Select_FPR64INX_Using_CC_GPR:
23211 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
23212 return emitSelectPseudo(MI, BB, Subtarget);
23213 case RISCV::BuildPairF64Pseudo:
23214 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
23215 case RISCV::SplitF64Pseudo:
23216 return emitSplitF64Pseudo(MI, BB, Subtarget);
23217 case RISCV::PseudoQuietFLE_H:
23218 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
23219 case RISCV::PseudoQuietFLE_H_INX:
23220 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
23221 case RISCV::PseudoQuietFLT_H:
23222 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
23223 case RISCV::PseudoQuietFLT_H_INX:
23224 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
23225 case RISCV::PseudoQuietFLE_S:
23226 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
23227 case RISCV::PseudoQuietFLE_S_INX:
23228 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
23229 case RISCV::PseudoQuietFLT_S:
23230 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
23231 case RISCV::PseudoQuietFLT_S_INX:
23232 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
23233 case RISCV::PseudoQuietFLE_D:
23234 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
23235 case RISCV::PseudoQuietFLE_D_INX:
23236 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
23237 case RISCV::PseudoQuietFLE_D_IN32X:
23238 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
23239 Subtarget);
23240 case RISCV::PseudoQuietFLT_D:
23241 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
23242 case RISCV::PseudoQuietFLT_D_INX:
23243 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
23244 case RISCV::PseudoQuietFLT_D_IN32X:
23245 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
23246 Subtarget);
23247
23248 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
23249 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
23250 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
23251 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
23252 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
23253 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
23254 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
23255 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
23256 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
23257 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
23258 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
23259 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
23260 case RISCV::PseudoFROUND_H:
23261 case RISCV::PseudoFROUND_H_INX:
23262 case RISCV::PseudoFROUND_S:
23263 case RISCV::PseudoFROUND_S_INX:
23264 case RISCV::PseudoFROUND_D:
23265 case RISCV::PseudoFROUND_D_INX:
23266 case RISCV::PseudoFROUND_D_IN32X:
23267 return emitFROUND(MI, BB, Subtarget);
23268 case RISCV::PROBED_STACKALLOC_DYN:
23269 return emitDynamicProbedAlloc(MI, BB);
23270 case TargetOpcode::STATEPOINT:
23271 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
23272 // while jal call instruction (where statepoint will be lowered at the end)
23273 // has implicit def. This def is early-clobber as it will be set at
23274 // the moment of the call and earlier than any use is read.
23275 // Add this implicit dead def here as a workaround.
23276 MI.addOperand(*MI.getMF(),
23278 RISCV::X1, /*isDef*/ true,
23279 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
23280 /*isUndef*/ false, /*isEarlyClobber*/ true));
23281 [[fallthrough]];
23282 case TargetOpcode::STACKMAP:
23283 case TargetOpcode::PATCHPOINT:
23284 if (!Subtarget.is64Bit())
23285 reportFatalUsageError("STACKMAP, PATCHPOINT and STATEPOINT are only "
23286 "supported on 64-bit targets");
23287 return emitPatchPoint(MI, BB);
23288 }
23289}
23290
23292 SDNode *Node) const {
23293 // If instruction defines FRM operand, conservatively set it as non-dead to
23294 // express data dependency with FRM users and prevent incorrect instruction
23295 // reordering.
23296 if (auto *FRMDef = MI.findRegisterDefOperand(RISCV::FRM, /*TRI=*/nullptr)) {
23297 FRMDef->setIsDead(false);
23298 return;
23299 }
23300 // Add FRM dependency to any instructions with dynamic rounding mode.
23301 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
23302 if (Idx < 0) {
23303 // Vector pseudos have FRM index indicated by TSFlags.
23304 Idx = RISCVII::getFRMOpNum(MI.getDesc());
23305 if (Idx < 0)
23306 return;
23307 }
23308 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
23309 return;
23310 // If the instruction already reads FRM, don't add another read.
23311 if (MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr))
23312 return;
23313 MI.addOperand(
23314 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
23315}
23316
23317void RISCVTargetLowering::analyzeInputArgs(
23318 MachineFunction &MF, CCState &CCInfo,
23319 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
23320 RISCVCCAssignFn Fn) const {
23321 for (const auto &[Idx, In] : enumerate(Ins)) {
23322 MVT ArgVT = In.VT;
23323 ISD::ArgFlagsTy ArgFlags = In.Flags;
23324
23325 if (Fn(Idx, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo, IsRet,
23326 In.OrigTy)) {
23327 LLVM_DEBUG(dbgs() << "InputArg #" << Idx << " has unhandled type "
23328 << ArgVT << '\n');
23329 llvm_unreachable(nullptr);
23330 }
23331 }
23332}
23333
23334void RISCVTargetLowering::analyzeOutputArgs(
23335 MachineFunction &MF, CCState &CCInfo,
23336 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
23337 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
23338 for (const auto &[Idx, Out] : enumerate(Outs)) {
23339 MVT ArgVT = Out.VT;
23340 ISD::ArgFlagsTy ArgFlags = Out.Flags;
23341
23342 if (Fn(Idx, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo, IsRet,
23343 Out.OrigTy)) {
23344 LLVM_DEBUG(dbgs() << "OutputArg #" << Idx << " has unhandled type "
23345 << ArgVT << "\n");
23346 llvm_unreachable(nullptr);
23347 }
23348 }
23349}
23350
23351// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
23352// values.
23354 const CCValAssign &VA, const SDLoc &DL,
23355 const RISCVSubtarget &Subtarget) {
23356 if (VA.needsCustom()) {
23357 if (VA.getLocVT().isInteger() &&
23358 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
23359 return DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
23360 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
23361 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
23363 return convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
23364 llvm_unreachable("Unexpected Custom handling.");
23365 }
23366
23367 switch (VA.getLocInfo()) {
23368 default:
23369 llvm_unreachable("Unexpected CCValAssign::LocInfo");
23370 case CCValAssign::Full:
23371 break;
23372 case CCValAssign::BCvt:
23373 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
23374 break;
23375 }
23376 return Val;
23377}
23378
23379// The caller is responsible for loading the full value if the argument is
23380// passed with CCValAssign::Indirect.
23382 const CCValAssign &VA, const SDLoc &DL,
23383 const ISD::InputArg &In,
23384 const RISCVTargetLowering &TLI) {
23387 EVT LocVT = VA.getLocVT();
23388 SDValue Val;
23389 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
23390 Register VReg = RegInfo.createVirtualRegister(RC);
23391 RegInfo.addLiveIn(VA.getLocReg(), VReg);
23392 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
23393
23394 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
23395 if (In.isOrigArg()) {
23396 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
23397 if (OrigArg->getType()->isIntegerTy()) {
23398 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
23399 // An input zero extended from i31 can also be considered sign extended.
23400 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
23401 (BitWidth < 32 && In.Flags.isZExt())) {
23403 RVFI->addSExt32Register(VReg);
23404 }
23405 }
23406 }
23407
23409 return Val;
23410
23411 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
23412}
23413
23415 const CCValAssign &VA, const SDLoc &DL,
23416 const RISCVSubtarget &Subtarget) {
23417 EVT LocVT = VA.getLocVT();
23418
23419 if (VA.needsCustom()) {
23420 if (LocVT.isInteger() &&
23421 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
23422 return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
23423 if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32)
23424 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
23425 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
23426 return convertToScalableVector(LocVT, Val, DAG, Subtarget);
23427 llvm_unreachable("Unexpected Custom handling.");
23428 }
23429
23430 switch (VA.getLocInfo()) {
23431 default:
23432 llvm_unreachable("Unexpected CCValAssign::LocInfo");
23433 case CCValAssign::Full:
23434 break;
23435 case CCValAssign::BCvt:
23436 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
23437 break;
23438 }
23439 return Val;
23440}
23441
23442// The caller is responsible for loading the full value if the argument is
23443// passed with CCValAssign::Indirect.
23445 const CCValAssign &VA, const SDLoc &DL) {
23447 MachineFrameInfo &MFI = MF.getFrameInfo();
23448 EVT LocVT = VA.getLocVT();
23449 EVT ValVT = VA.getValVT();
23451 if (VA.getLocInfo() == CCValAssign::Indirect) {
23452 // When the value is a scalable vector, we save the pointer which points to
23453 // the scalable vector value in the stack. The ValVT will be the pointer
23454 // type, instead of the scalable vector type.
23455 ValVT = LocVT;
23456 }
23457 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
23458 /*IsImmutable=*/true);
23459 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
23460 SDValue Val;
23461
23463 switch (VA.getLocInfo()) {
23464 default:
23465 llvm_unreachable("Unexpected CCValAssign::LocInfo");
23466 case CCValAssign::Full:
23468 case CCValAssign::BCvt:
23469 break;
23470 }
23471 Val = DAG.getExtLoad(
23472 ExtType, DL, LocVT, Chain, FIN,
23474 return Val;
23475}
23476
23478 const CCValAssign &VA,
23479 const CCValAssign &HiVA,
23480 const SDLoc &DL) {
23481 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
23482 "Unexpected VA");
23484 MachineFrameInfo &MFI = MF.getFrameInfo();
23486
23487 assert(VA.isRegLoc() && "Expected register VA assignment");
23488
23489 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
23490 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
23491 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
23492 SDValue Hi;
23493 if (HiVA.isMemLoc()) {
23494 // Second half of f64 is passed on the stack.
23495 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
23496 /*IsImmutable=*/true);
23497 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
23498 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
23500 } else {
23501 // Second half of f64 is passed in another GPR.
23502 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
23503 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
23504 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
23505 }
23506 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
23507}
23508
23509// Transform physical registers into virtual registers.
23511 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
23512 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
23513 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
23514
23516
23517 switch (CallConv) {
23518 default:
23519 reportFatalUsageError("Unsupported calling convention");
23520 case CallingConv::C:
23521 case CallingConv::Fast:
23524 case CallingConv::GRAAL:
23526#define CC_VLS_CASE(ABI_VLEN) case CallingConv::RISCV_VLSCall_##ABI_VLEN:
23527 CC_VLS_CASE(32)
23528 CC_VLS_CASE(64)
23529 CC_VLS_CASE(128)
23530 CC_VLS_CASE(256)
23531 CC_VLS_CASE(512)
23532 CC_VLS_CASE(1024)
23533 CC_VLS_CASE(2048)
23534 CC_VLS_CASE(4096)
23535 CC_VLS_CASE(8192)
23536 CC_VLS_CASE(16384)
23537 CC_VLS_CASE(32768)
23538 CC_VLS_CASE(65536)
23539#undef CC_VLS_CASE
23540 break;
23541 case CallingConv::GHC:
23542 if (Subtarget.hasStdExtE())
23543 reportFatalUsageError("GHC calling convention is not supported on RVE!");
23544 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
23545 reportFatalUsageError("GHC calling convention requires the (Zfinx/F) and "
23546 "(Zdinx/D) instruction set extensions");
23547 }
23548
23549 const Function &Func = MF.getFunction();
23550 if (Func.hasFnAttribute("interrupt")) {
23551 if (!Func.arg_empty())
23553 "Functions with the interrupt attribute cannot have arguments!");
23554
23555 StringRef Kind =
23556 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
23557
23558 constexpr StringLiteral SupportedInterruptKinds[] = {
23559 "machine",
23560 "supervisor",
23561 "rnmi",
23562 "qci-nest",
23563 "qci-nonest",
23564 "SiFive-CLIC-preemptible",
23565 "SiFive-CLIC-stack-swap",
23566 "SiFive-CLIC-preemptible-stack-swap",
23567 };
23568 if (!llvm::is_contained(SupportedInterruptKinds, Kind))
23570 "Function interrupt attribute argument not supported!");
23571
23572 if (Kind.starts_with("qci-") && !Subtarget.hasVendorXqciint())
23574 "'qci-*' interrupt kinds require Xqciint extension");
23575
23576 if (Kind.starts_with("SiFive-CLIC-") && !Subtarget.hasVendorXSfmclic())
23578 "'SiFive-CLIC-*' interrupt kinds require XSfmclic extension");
23579
23580 if (Kind == "rnmi" && !Subtarget.hasStdExtSmrnmi())
23581 reportFatalUsageError("'rnmi' interrupt kind requires Srnmi extension");
23582 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
23583 if (Kind.starts_with("SiFive-CLIC-preemptible") && TFI->hasFP(MF))
23584 reportFatalUsageError("'SiFive-CLIC-preemptible' interrupt kinds cannot "
23585 "have a frame pointer");
23586 }
23587
23588 EVT PtrVT = getPointerTy(DAG.getDataLayout());
23589 MVT XLenVT = Subtarget.getXLenVT();
23590 unsigned XLenInBytes = Subtarget.getXLen() / 8;
23591 // Used with vargs to accumulate store chains.
23592 std::vector<SDValue> OutChains;
23593
23594 // Assign locations to all of the incoming arguments.
23596 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
23597
23598 if (CallConv == CallingConv::GHC)
23600 else
23601 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
23603 : CC_RISCV);
23604
23605 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
23606 CCValAssign &VA = ArgLocs[i];
23607 SDValue ArgValue;
23608 // Passing f64 on RV32D with a soft float ABI must be handled as a special
23609 // case.
23610 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
23611 assert(VA.needsCustom());
23612 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
23613 } else if (VA.isRegLoc())
23614 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
23615 else
23616 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
23617
23618 if (VA.getLocInfo() == CCValAssign::Indirect) {
23619 // If the original argument was split and passed by reference (e.g. i128
23620 // on RV32), we need to load all parts of it here (using the same
23621 // address). Vectors may be partly split to registers and partly to the
23622 // stack, in which case the base address is partly offset and subsequent
23623 // stores are relative to that.
23624 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
23626 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
23627 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
23628 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
23629 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
23630 CCValAssign &PartVA = ArgLocs[i + 1];
23631 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
23632 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
23633 if (PartVA.getValVT().isScalableVector())
23634 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
23635 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
23636 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
23638 ++i;
23639 ++InsIdx;
23640 }
23641 continue;
23642 }
23643 InVals.push_back(ArgValue);
23644 }
23645
23646 if (any_of(ArgLocs,
23647 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
23649
23650 if (IsVarArg) {
23651 ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());
23652 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
23653 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
23654 MachineFrameInfo &MFI = MF.getFrameInfo();
23655 MachineRegisterInfo &RegInfo = MF.getRegInfo();
23657
23658 // Size of the vararg save area. For now, the varargs save area is either
23659 // zero or large enough to hold a0-a7.
23660 int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
23661 int FI;
23662
23663 // If all registers are allocated, then all varargs must be passed on the
23664 // stack and we don't need to save any argregs.
23665 if (VarArgsSaveSize == 0) {
23666 int VaArgOffset = CCInfo.getStackSize();
23667 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
23668 } else {
23669 int VaArgOffset = -VarArgsSaveSize;
23670 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
23671
23672 // If saving an odd number of registers then create an extra stack slot to
23673 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
23674 // offsets to even-numbered registered remain 2*XLEN-aligned.
23675 if (Idx % 2) {
23677 XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);
23678 VarArgsSaveSize += XLenInBytes;
23679 }
23680
23681 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
23682
23683 // Copy the integer registers that may have been used for passing varargs
23684 // to the vararg save area.
23685 for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
23686 const Register Reg = RegInfo.createVirtualRegister(RC);
23687 RegInfo.addLiveIn(ArgRegs[I], Reg);
23688 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
23689 SDValue Store = DAG.getStore(
23690 Chain, DL, ArgValue, FIN,
23691 MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));
23692 OutChains.push_back(Store);
23693 FIN =
23694 DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);
23695 }
23696 }
23697
23698 // Record the frame index of the first variable argument
23699 // which is a value necessary to VASTART.
23700 RVFI->setVarArgsFrameIndex(FI);
23701 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
23702 }
23703
23704 // All stores are grouped in one node to allow the matching between
23705 // the size of Ins and InVals. This only happens for vararg functions.
23706 if (!OutChains.empty()) {
23707 OutChains.push_back(Chain);
23708 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
23709 }
23710
23711 return Chain;
23712}
23713
23714/// isEligibleForTailCallOptimization - Check whether the call is eligible
23715/// for tail call optimization.
23716/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
23717bool RISCVTargetLowering::isEligibleForTailCallOptimization(
23718 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
23719 const SmallVector<CCValAssign, 16> &ArgLocs) const {
23720
23721 auto CalleeCC = CLI.CallConv;
23722 auto &Outs = CLI.Outs;
23723 auto &Caller = MF.getFunction();
23724 auto CallerCC = Caller.getCallingConv();
23725
23726 // Exception-handling functions need a special set of instructions to
23727 // indicate a return to the hardware. Tail-calling another function would
23728 // probably break this.
23729 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
23730 // should be expanded as new function attributes are introduced.
23731 if (Caller.hasFnAttribute("interrupt"))
23732 return false;
23733
23734 // Do not tail call opt if the stack is used to pass parameters.
23735 if (CCInfo.getStackSize() != 0)
23736 return false;
23737
23738 // Do not tail call opt if any parameters need to be passed indirectly.
23739 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
23740 // passed indirectly. So the address of the value will be passed in a
23741 // register, or if not available, then the address is put on the stack. In
23742 // order to pass indirectly, space on the stack often needs to be allocated
23743 // in order to store the value. In this case the CCInfo.getNextStackOffset()
23744 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
23745 // are passed CCValAssign::Indirect.
23746 for (auto &VA : ArgLocs)
23747 if (VA.getLocInfo() == CCValAssign::Indirect)
23748 return false;
23749
23750 // Do not tail call opt if either caller or callee uses struct return
23751 // semantics.
23752 auto IsCallerStructRet = Caller.hasStructRetAttr();
23753 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
23754 if (IsCallerStructRet || IsCalleeStructRet)
23755 return false;
23756
23757 // The callee has to preserve all registers the caller needs to preserve.
23758 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
23759 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
23760 if (CalleeCC != CallerCC) {
23761 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
23762 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
23763 return false;
23764 }
23765
23766 // Byval parameters hand the function a pointer directly into the stack area
23767 // we want to reuse during a tail call. Working around this *is* possible
23768 // but less efficient and uglier in LowerCall.
23769 for (auto &Arg : Outs)
23770 if (Arg.Flags.isByVal())
23771 return false;
23772
23773 return true;
23774}
23775
23777 return DAG.getDataLayout().getPrefTypeAlign(
23778 VT.getTypeForEVT(*DAG.getContext()));
23779}
23780
23781// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
23782// and output parameter nodes.
23784 SmallVectorImpl<SDValue> &InVals) const {
23785 SelectionDAG &DAG = CLI.DAG;
23786 SDLoc &DL = CLI.DL;
23788 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
23790 SDValue Chain = CLI.Chain;
23791 SDValue Callee = CLI.Callee;
23792 bool &IsTailCall = CLI.IsTailCall;
23793 CallingConv::ID CallConv = CLI.CallConv;
23794 bool IsVarArg = CLI.IsVarArg;
23795 EVT PtrVT = getPointerTy(DAG.getDataLayout());
23796 MVT XLenVT = Subtarget.getXLenVT();
23797 const CallBase *CB = CLI.CB;
23798
23801
23802 // Set type id for call site info.
23803 if (MF.getTarget().Options.EmitCallGraphSection && CB && CB->isIndirectCall())
23804 CSInfo = MachineFunction::CallSiteInfo(*CB);
23805
23806 // Analyze the operands of the call, assigning locations to each operand.
23808 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
23809
23810 if (CallConv == CallingConv::GHC) {
23811 if (Subtarget.hasStdExtE())
23812 reportFatalUsageError("GHC calling convention is not supported on RVE!");
23813 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
23814 } else
23815 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
23817 : CC_RISCV);
23818
23819 // Check if it's really possible to do a tail call.
23820 if (IsTailCall)
23821 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
23822
23823 if (IsTailCall)
23824 ++NumTailCalls;
23825 else if (CLI.CB && CLI.CB->isMustTailCall())
23826 reportFatalInternalError("failed to perform tail call elimination on a "
23827 "call site marked musttail");
23828
23829 // Get a count of how many bytes are to be pushed on the stack.
23830 unsigned NumBytes = ArgCCInfo.getStackSize();
23831
23832 // Create local copies for byval args
23833 SmallVector<SDValue, 8> ByValArgs;
23834 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
23835 ISD::ArgFlagsTy Flags = Outs[i].Flags;
23836 if (!Flags.isByVal())
23837 continue;
23838
23839 SDValue Arg = OutVals[i];
23840 unsigned Size = Flags.getByValSize();
23841 Align Alignment = Flags.getNonZeroByValAlign();
23842
23843 int FI =
23844 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
23845 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
23846 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
23847
23848 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
23849 /*IsVolatile=*/false,
23850 /*AlwaysInline=*/false, /*CI*/ nullptr, IsTailCall,
23852 ByValArgs.push_back(FIPtr);
23853 }
23854
23855 if (!IsTailCall)
23856 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
23857
23858 // Copy argument values to their designated locations.
23860 SmallVector<SDValue, 8> MemOpChains;
23861 SDValue StackPtr;
23862 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
23863 ++i, ++OutIdx) {
23864 CCValAssign &VA = ArgLocs[i];
23865 SDValue ArgValue = OutVals[OutIdx];
23866 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
23867
23868 // Handle passing f64 on RV32D with a soft float ABI as a special case.
23869 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
23870 assert(VA.isRegLoc() && "Expected register VA assignment");
23871 assert(VA.needsCustom());
23872 SDValue SplitF64 = DAG.getNode(
23873 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
23874 SDValue Lo = SplitF64.getValue(0);
23875 SDValue Hi = SplitF64.getValue(1);
23876
23877 Register RegLo = VA.getLocReg();
23878 RegsToPass.push_back(std::make_pair(RegLo, Lo));
23879
23880 // Get the CCValAssign for the Hi part.
23881 CCValAssign &HiVA = ArgLocs[++i];
23882
23883 if (HiVA.isMemLoc()) {
23884 // Second half of f64 is passed on the stack.
23885 if (!StackPtr.getNode())
23886 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
23888 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
23889 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
23890 // Emit the store.
23891 MemOpChains.push_back(DAG.getStore(
23892 Chain, DL, Hi, Address,
23894 } else {
23895 // Second half of f64 is passed in another GPR.
23896 Register RegHigh = HiVA.getLocReg();
23897 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
23898 }
23899 continue;
23900 }
23901
23902 // Promote the value if needed.
23903 // For now, only handle fully promoted and indirect arguments.
23904 if (VA.getLocInfo() == CCValAssign::Indirect) {
23905 // Store the argument in a stack slot and pass its address.
23906 Align StackAlign =
23907 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
23908 getPrefTypeAlign(ArgValue.getValueType(), DAG));
23909 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
23910 // If the original argument was split (e.g. i128), we need
23911 // to store the required parts of it here (and pass just one address).
23912 // Vectors may be partly split to registers and partly to the stack, in
23913 // which case the base address is partly offset and subsequent stores are
23914 // relative to that.
23915 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
23916 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
23917 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
23918 // Calculate the total size to store. We don't have access to what we're
23919 // actually storing other than performing the loop and collecting the
23920 // info.
23922 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
23923 SDValue PartValue = OutVals[OutIdx + 1];
23924 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
23925 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
23926 EVT PartVT = PartValue.getValueType();
23927 if (PartVT.isScalableVector())
23928 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
23929 StoredSize += PartVT.getStoreSize();
23930 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
23931 Parts.push_back(std::make_pair(PartValue, Offset));
23932 ++i;
23933 ++OutIdx;
23934 }
23935 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
23936 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
23937 MemOpChains.push_back(
23938 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
23940 for (const auto &Part : Parts) {
23941 SDValue PartValue = Part.first;
23942 SDValue PartOffset = Part.second;
23944 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
23945 MemOpChains.push_back(
23946 DAG.getStore(Chain, DL, PartValue, Address,
23948 }
23949 ArgValue = SpillSlot;
23950 } else {
23951 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
23952 }
23953
23954 // Use local copy if it is a byval arg.
23955 if (Flags.isByVal())
23956 ArgValue = ByValArgs[j++];
23957
23958 if (VA.isRegLoc()) {
23959 // Queue up the argument copies and emit them at the end.
23960 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
23961
23962 const TargetOptions &Options = DAG.getTarget().Options;
23963 if (Options.EmitCallSiteInfo)
23964 CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), i);
23965 } else {
23966 assert(VA.isMemLoc() && "Argument not register or memory");
23967 assert(!IsTailCall && "Tail call not allowed if stack is used "
23968 "for passing parameters");
23969
23970 // Work out the address of the stack slot.
23971 if (!StackPtr.getNode())
23972 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
23974 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
23976
23977 // Emit the store.
23978 MemOpChains.push_back(
23979 DAG.getStore(Chain, DL, ArgValue, Address,
23981 }
23982 }
23983
23984 // Join the stores, which are independent of one another.
23985 if (!MemOpChains.empty())
23986 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
23987
23988 SDValue Glue;
23989
23990 // Build a sequence of copy-to-reg nodes, chained and glued together.
23991 for (auto &Reg : RegsToPass) {
23992 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
23993 Glue = Chain.getValue(1);
23994 }
23995
23996 // Validate that none of the argument registers have been marked as
23997 // reserved, if so report an error. Do the same for the return address if this
23998 // is not a tailcall.
23999 validateCCReservedRegs(RegsToPass, MF);
24000 if (!IsTailCall && MF.getSubtarget().isRegisterReservedByUser(RISCV::X1))
24002 MF.getFunction(),
24003 "Return address register required, but has been reserved."});
24004
24005 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
24006 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
24007 // split it and then direct call can be matched by PseudoCALL.
24008 bool CalleeIsLargeExternalSymbol = false;
24010 if (auto *S = dyn_cast<GlobalAddressSDNode>(Callee))
24011 Callee = getLargeGlobalAddress(S, DL, PtrVT, DAG);
24012 else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
24013 Callee = getLargeExternalSymbol(S, DL, PtrVT, DAG);
24014 CalleeIsLargeExternalSymbol = true;
24015 }
24016 } else if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
24017 const GlobalValue *GV = S->getGlobal();
24018 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL);
24019 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
24020 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL);
24021 }
24022
24023 // The first call operand is the chain and the second is the target address.
24025 Ops.push_back(Chain);
24026 Ops.push_back(Callee);
24027
24028 // Add argument registers to the end of the list so that they are
24029 // known live into the call.
24030 for (auto &Reg : RegsToPass)
24031 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
24032
24033 // Add a register mask operand representing the call-preserved registers.
24034 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
24035 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
24036 assert(Mask && "Missing call preserved mask for calling convention");
24037 Ops.push_back(DAG.getRegisterMask(Mask));
24038
24039 // Glue the call to the argument copies, if any.
24040 if (Glue.getNode())
24041 Ops.push_back(Glue);
24042
24043 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
24044 "Unexpected CFI type for a direct call");
24045
24046 // Emit the call.
24047 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
24048
24049 // Use software guarded branch for large code model non-indirect calls
24050 // Tail call to external symbol will have a null CLI.CB and we need another
24051 // way to determine the callsite type
24052 bool NeedSWGuarded = false;
24054 Subtarget.hasStdExtZicfilp() &&
24055 ((CLI.CB && !CLI.CB->isIndirectCall()) || CalleeIsLargeExternalSymbol))
24056 NeedSWGuarded = true;
24057
24058 if (IsTailCall) {
24060 unsigned CallOpc =
24061 NeedSWGuarded ? RISCVISD::SW_GUARDED_TAIL : RISCVISD::TAIL;
24062 SDValue Ret = DAG.getNode(CallOpc, DL, NodeTys, Ops);
24063 if (CLI.CFIType)
24064 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
24065 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
24066 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
24067 return Ret;
24068 }
24069
24070 unsigned CallOpc = NeedSWGuarded ? RISCVISD::SW_GUARDED_CALL : RISCVISD::CALL;
24071 Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
24072 if (CLI.CFIType)
24073 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
24074
24075 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
24076 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
24077 Glue = Chain.getValue(1);
24078
24079 // Mark the end of the call, which is glued to the call itself.
24080 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
24081 Glue = Chain.getValue(1);
24082
24083 // Assign locations to each value returned by this call.
24085 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
24086 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_RISCV);
24087
24088 // Copy all of the result registers out of their specified physreg.
24089 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
24090 auto &VA = RVLocs[i];
24091 // Copy the value out
24092 SDValue RetValue =
24093 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
24094 // Glue the RetValue to the end of the call sequence
24095 Chain = RetValue.getValue(1);
24096 Glue = RetValue.getValue(2);
24097
24098 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
24099 assert(VA.needsCustom());
24100 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
24101 MVT::i32, Glue);
24102 Chain = RetValue2.getValue(1);
24103 Glue = RetValue2.getValue(2);
24104 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
24105 RetValue2);
24106 } else
24107 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
24108
24109 InVals.push_back(RetValue);
24110 }
24111
24112 return Chain;
24113}
24114
24116 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
24117 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
24118 const Type *RetTy) const {
24120 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
24121
24122 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
24123 MVT VT = Outs[i].VT;
24124 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
24125 if (CC_RISCV(i, VT, VT, CCValAssign::Full, ArgFlags, CCInfo,
24126 /*IsRet=*/true, Outs[i].OrigTy))
24127 return false;
24128 }
24129 return true;
24130}
24131
24132SDValue
24134 bool IsVarArg,
24136 const SmallVectorImpl<SDValue> &OutVals,
24137 const SDLoc &DL, SelectionDAG &DAG) const {
24139
24140 // Stores the assignment of the return value to a location.
24142
24143 // Info about the registers and stack slot.
24144 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
24145 *DAG.getContext());
24146
24147 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
24148 nullptr, CC_RISCV);
24149
24150 if (CallConv == CallingConv::GHC && !RVLocs.empty())
24151 reportFatalUsageError("GHC functions return void only");
24152
24153 SDValue Glue;
24154 SmallVector<SDValue, 4> RetOps(1, Chain);
24155
24156 // Copy the result values into the output registers.
24157 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
24158 SDValue Val = OutVals[OutIdx];
24159 CCValAssign &VA = RVLocs[i];
24160 assert(VA.isRegLoc() && "Can only return in registers!");
24161
24162 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
24163 // Handle returning f64 on RV32D with a soft float ABI.
24164 assert(VA.isRegLoc() && "Expected return via registers");
24165 assert(VA.needsCustom());
24166 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
24167 DAG.getVTList(MVT::i32, MVT::i32), Val);
24168 SDValue Lo = SplitF64.getValue(0);
24169 SDValue Hi = SplitF64.getValue(1);
24170 Register RegLo = VA.getLocReg();
24171 Register RegHi = RVLocs[++i].getLocReg();
24172
24173 if (Subtarget.isRegisterReservedByUser(RegLo) ||
24174 Subtarget.isRegisterReservedByUser(RegHi))
24176 MF.getFunction(),
24177 "Return value register required, but has been reserved."});
24178
24179 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
24180 Glue = Chain.getValue(1);
24181 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
24182 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
24183 Glue = Chain.getValue(1);
24184 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
24185 } else {
24186 // Handle a 'normal' return.
24187 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
24188 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
24189
24190 if (Subtarget.isRegisterReservedByUser(VA.getLocReg()))
24192 MF.getFunction(),
24193 "Return value register required, but has been reserved."});
24194
24195 // Guarantee that all emitted copies are stuck together.
24196 Glue = Chain.getValue(1);
24197 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
24198 }
24199 }
24200
24201 RetOps[0] = Chain; // Update chain.
24202
24203 // Add the glue node if we have it.
24204 if (Glue.getNode()) {
24205 RetOps.push_back(Glue);
24206 }
24207
24208 if (any_of(RVLocs,
24209 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
24211
24212 unsigned RetOpc = RISCVISD::RET_GLUE;
24213 // Interrupt service routines use different return instructions.
24214 const Function &Func = DAG.getMachineFunction().getFunction();
24215 if (Func.hasFnAttribute("interrupt")) {
24216 if (!Func.getReturnType()->isVoidTy())
24218 "Functions with the interrupt attribute must have void return type!");
24219
24221 StringRef Kind =
24222 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
24223
24224 if (Kind == "supervisor")
24225 RetOpc = RISCVISD::SRET_GLUE;
24226 else if (Kind == "rnmi") {
24227 assert(Subtarget.hasFeature(RISCV::FeatureStdExtSmrnmi) &&
24228 "Need Smrnmi extension for rnmi");
24229 RetOpc = RISCVISD::MNRET_GLUE;
24230 } else if (Kind == "qci-nest" || Kind == "qci-nonest") {
24231 assert(Subtarget.hasFeature(RISCV::FeatureVendorXqciint) &&
24232 "Need Xqciint for qci-(no)nest");
24233 RetOpc = RISCVISD::QC_C_MILEAVERET_GLUE;
24234 } else
24235 RetOpc = RISCVISD::MRET_GLUE;
24236 }
24237
24238 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
24239}
24240
24241void RISCVTargetLowering::validateCCReservedRegs(
24242 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
24243 MachineFunction &MF) const {
24244 const Function &F = MF.getFunction();
24245
24246 if (llvm::any_of(Regs, [this](auto Reg) {
24247 return Subtarget.isRegisterReservedByUser(Reg.first);
24248 }))
24249 F.getContext().diagnose(DiagnosticInfoUnsupported{
24250 F, "Argument register required, but has been reserved."});
24251}
24252
24253// Check if the result of the node is only used as a return value, as
24254// otherwise we can't perform a tail-call.
24256 if (N->getNumValues() != 1)
24257 return false;
24258 if (!N->hasNUsesOfValue(1, 0))
24259 return false;
24260
24261 SDNode *Copy = *N->user_begin();
24262
24263 if (Copy->getOpcode() == ISD::BITCAST) {
24264 return isUsedByReturnOnly(Copy, Chain);
24265 }
24266
24267 // TODO: Handle additional opcodes in order to support tail-calling libcalls
24268 // with soft float ABIs.
24269 if (Copy->getOpcode() != ISD::CopyToReg) {
24270 return false;
24271 }
24272
24273 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
24274 // isn't safe to perform a tail call.
24275 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
24276 return false;
24277
24278 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
24279 bool HasRet = false;
24280 for (SDNode *Node : Copy->users()) {
24281 if (Node->getOpcode() != RISCVISD::RET_GLUE)
24282 return false;
24283 HasRet = true;
24284 }
24285 if (!HasRet)
24286 return false;
24287
24288 Chain = Copy->getOperand(0);
24289 return true;
24290}
24291
24293 return CI->isTailCall();
24294}
24295
24296/// getConstraintType - Given a constraint letter, return the type of
24297/// constraint it is for this target.
24300 if (Constraint.size() == 1) {
24301 switch (Constraint[0]) {
24302 default:
24303 break;
24304 case 'f':
24305 case 'R':
24306 return C_RegisterClass;
24307 case 'I':
24308 case 'J':
24309 case 'K':
24310 return C_Immediate;
24311 case 'A':
24312 return C_Memory;
24313 case 's':
24314 case 'S': // A symbolic address
24315 return C_Other;
24316 }
24317 } else {
24318 if (Constraint == "vr" || Constraint == "vd" || Constraint == "vm")
24319 return C_RegisterClass;
24320 if (Constraint == "cr" || Constraint == "cR" || Constraint == "cf")
24321 return C_RegisterClass;
24322 }
24323 return TargetLowering::getConstraintType(Constraint);
24324}
24325
24326std::pair<unsigned, const TargetRegisterClass *>
24328 StringRef Constraint,
24329 MVT VT) const {
24330 // First, see if this is a constraint that directly corresponds to a RISC-V
24331 // register class.
24332 if (Constraint.size() == 1) {
24333 switch (Constraint[0]) {
24334 case 'r':
24335 // TODO: Support fixed vectors up to XLen for P extension?
24336 if (VT.isVector())
24337 break;
24338 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
24339 return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
24340 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
24341 return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
24342 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
24343 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
24344 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
24345 case 'f':
24346 if (VT == MVT::f16) {
24347 if (Subtarget.hasStdExtZfhmin())
24348 return std::make_pair(0U, &RISCV::FPR16RegClass);
24349 if (Subtarget.hasStdExtZhinxmin())
24350 return std::make_pair(0U, &RISCV::GPRF16NoX0RegClass);
24351 } else if (VT == MVT::f32) {
24352 if (Subtarget.hasStdExtF())
24353 return std::make_pair(0U, &RISCV::FPR32RegClass);
24354 if (Subtarget.hasStdExtZfinx())
24355 return std::make_pair(0U, &RISCV::GPRF32NoX0RegClass);
24356 } else if (VT == MVT::f64) {
24357 if (Subtarget.hasStdExtD())
24358 return std::make_pair(0U, &RISCV::FPR64RegClass);
24359 if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
24360 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
24361 if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
24362 return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
24363 }
24364 break;
24365 case 'R':
24366 if (((VT == MVT::i64 || VT == MVT::f64) && !Subtarget.is64Bit()) ||
24367 (VT == MVT::i128 && Subtarget.is64Bit()))
24368 return std::make_pair(0U, &RISCV::GPRPairNoX0RegClass);
24369 break;
24370 default:
24371 break;
24372 }
24373 } else if (Constraint == "vr") {
24374 // Check VM and fractional LMUL first so that those types will use that
24375 // class instead of VR.
24376 for (const auto *RC :
24377 {&RISCV::VMRegClass, &RISCV::VRMF8RegClass, &RISCV::VRMF4RegClass,
24378 &RISCV::VRMF2RegClass, &RISCV::VRRegClass, &RISCV::VRM2RegClass,
24379 &RISCV::VRM4RegClass, &RISCV::VRM8RegClass, &RISCV::VRN2M1RegClass,
24380 &RISCV::VRN3M1RegClass, &RISCV::VRN4M1RegClass,
24381 &RISCV::VRN5M1RegClass, &RISCV::VRN6M1RegClass,
24382 &RISCV::VRN7M1RegClass, &RISCV::VRN8M1RegClass,
24383 &RISCV::VRN2M2RegClass, &RISCV::VRN3M2RegClass,
24384 &RISCV::VRN4M2RegClass, &RISCV::VRN2M4RegClass}) {
24385 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
24386 return std::make_pair(0U, RC);
24387
24388 if (VT.isFixedLengthVector() && useRVVForFixedLengthVectorVT(VT)) {
24389 MVT ContainerVT = getContainerForFixedLengthVector(VT);
24390 if (TRI->isTypeLegalForClass(*RC, ContainerVT))
24391 return std::make_pair(0U, RC);
24392 }
24393 }
24394 } else if (Constraint == "vd") {
24395 // Check VMNoV0 and fractional LMUL first so that those types will use that
24396 // class instead of VRNoV0.
24397 for (const auto *RC :
24398 {&RISCV::VMNoV0RegClass, &RISCV::VRMF8NoV0RegClass,
24399 &RISCV::VRMF4NoV0RegClass, &RISCV::VRMF2NoV0RegClass,
24400 &RISCV::VRNoV0RegClass, &RISCV::VRM2NoV0RegClass,
24401 &RISCV::VRM4NoV0RegClass, &RISCV::VRM8NoV0RegClass,
24402 &RISCV::VRN2M1NoV0RegClass, &RISCV::VRN3M1NoV0RegClass,
24403 &RISCV::VRN4M1NoV0RegClass, &RISCV::VRN5M1NoV0RegClass,
24404 &RISCV::VRN6M1NoV0RegClass, &RISCV::VRN7M1NoV0RegClass,
24405 &RISCV::VRN8M1NoV0RegClass, &RISCV::VRN2M2NoV0RegClass,
24406 &RISCV::VRN3M2NoV0RegClass, &RISCV::VRN4M2NoV0RegClass,
24407 &RISCV::VRN2M4NoV0RegClass}) {
24408 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
24409 return std::make_pair(0U, RC);
24410
24411 if (VT.isFixedLengthVector() && useRVVForFixedLengthVectorVT(VT)) {
24412 MVT ContainerVT = getContainerForFixedLengthVector(VT);
24413 if (TRI->isTypeLegalForClass(*RC, ContainerVT))
24414 return std::make_pair(0U, RC);
24415 }
24416 }
24417 } else if (Constraint == "vm") {
24418 if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
24419 return std::make_pair(0U, &RISCV::VMV0RegClass);
24420
24421 if (VT.isFixedLengthVector() && useRVVForFixedLengthVectorVT(VT)) {
24422 MVT ContainerVT = getContainerForFixedLengthVector(VT);
24423 // VT here might be coerced to vector with i8 elements, so we need to
24424 // check if this is a M1 register here instead of checking VMV0RegClass.
24425 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, ContainerVT))
24426 return std::make_pair(0U, &RISCV::VMV0RegClass);
24427 }
24428 } else if (Constraint == "cr") {
24429 if (VT == MVT::f16 && Subtarget.hasStdExtZhinxmin())
24430 return std::make_pair(0U, &RISCV::GPRF16CRegClass);
24431 if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
24432 return std::make_pair(0U, &RISCV::GPRF32CRegClass);
24433 if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
24434 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
24435 if (!VT.isVector())
24436 return std::make_pair(0U, &RISCV::GPRCRegClass);
24437 } else if (Constraint == "cR") {
24438 if (((VT == MVT::i64 || VT == MVT::f64) && !Subtarget.is64Bit()) ||
24439 (VT == MVT::i128 && Subtarget.is64Bit()))
24440 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
24441 } else if (Constraint == "cf") {
24442 if (VT == MVT::f16) {
24443 if (Subtarget.hasStdExtZfhmin())
24444 return std::make_pair(0U, &RISCV::FPR16CRegClass);
24445 if (Subtarget.hasStdExtZhinxmin())
24446 return std::make_pair(0U, &RISCV::GPRF16CRegClass);
24447 } else if (VT == MVT::f32) {
24448 if (Subtarget.hasStdExtF())
24449 return std::make_pair(0U, &RISCV::FPR32CRegClass);
24450 if (Subtarget.hasStdExtZfinx())
24451 return std::make_pair(0U, &RISCV::GPRF32CRegClass);
24452 } else if (VT == MVT::f64) {
24453 if (Subtarget.hasStdExtD())
24454 return std::make_pair(0U, &RISCV::FPR64CRegClass);
24455 if (Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
24456 return std::make_pair(0U, &RISCV::GPRPairCRegClass);
24457 if (Subtarget.hasStdExtZdinx() && Subtarget.is64Bit())
24458 return std::make_pair(0U, &RISCV::GPRCRegClass);
24459 }
24460 }
24461
24462 // Clang will correctly decode the usage of register name aliases into their
24463 // official names. However, other frontends like `rustc` do not. This allows
24464 // users of these frontends to use the ABI names for registers in LLVM-style
24465 // register constraints.
24466 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
24467 .Case("{zero}", RISCV::X0)
24468 .Case("{ra}", RISCV::X1)
24469 .Case("{sp}", RISCV::X2)
24470 .Case("{gp}", RISCV::X3)
24471 .Case("{tp}", RISCV::X4)
24472 .Case("{t0}", RISCV::X5)
24473 .Case("{t1}", RISCV::X6)
24474 .Case("{t2}", RISCV::X7)
24475 .Cases({"{s0}", "{fp}"}, RISCV::X8)
24476 .Case("{s1}", RISCV::X9)
24477 .Case("{a0}", RISCV::X10)
24478 .Case("{a1}", RISCV::X11)
24479 .Case("{a2}", RISCV::X12)
24480 .Case("{a3}", RISCV::X13)
24481 .Case("{a4}", RISCV::X14)
24482 .Case("{a5}", RISCV::X15)
24483 .Case("{a6}", RISCV::X16)
24484 .Case("{a7}", RISCV::X17)
24485 .Case("{s2}", RISCV::X18)
24486 .Case("{s3}", RISCV::X19)
24487 .Case("{s4}", RISCV::X20)
24488 .Case("{s5}", RISCV::X21)
24489 .Case("{s6}", RISCV::X22)
24490 .Case("{s7}", RISCV::X23)
24491 .Case("{s8}", RISCV::X24)
24492 .Case("{s9}", RISCV::X25)
24493 .Case("{s10}", RISCV::X26)
24494 .Case("{s11}", RISCV::X27)
24495 .Case("{t3}", RISCV::X28)
24496 .Case("{t4}", RISCV::X29)
24497 .Case("{t5}", RISCV::X30)
24498 .Case("{t6}", RISCV::X31)
24499 .Default(RISCV::NoRegister);
24500 if (XRegFromAlias != RISCV::NoRegister)
24501 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
24502
24503 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
24504 // TableGen record rather than the AsmName to choose registers for InlineAsm
24505 // constraints, plus we want to match those names to the widest floating point
24506 // register type available, manually select floating point registers here.
24507 //
24508 // The second case is the ABI name of the register, so that frontends can also
24509 // use the ABI names in register constraint lists.
24510 if (Subtarget.hasStdExtF()) {
24511 unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
24512 .Cases({"{f0}", "{ft0}"}, RISCV::F0_F)
24513 .Cases({"{f1}", "{ft1}"}, RISCV::F1_F)
24514 .Cases({"{f2}", "{ft2}"}, RISCV::F2_F)
24515 .Cases({"{f3}", "{ft3}"}, RISCV::F3_F)
24516 .Cases({"{f4}", "{ft4}"}, RISCV::F4_F)
24517 .Cases({"{f5}", "{ft5}"}, RISCV::F5_F)
24518 .Cases({"{f6}", "{ft6}"}, RISCV::F6_F)
24519 .Cases({"{f7}", "{ft7}"}, RISCV::F7_F)
24520 .Cases({"{f8}", "{fs0}"}, RISCV::F8_F)
24521 .Cases({"{f9}", "{fs1}"}, RISCV::F9_F)
24522 .Cases({"{f10}", "{fa0}"}, RISCV::F10_F)
24523 .Cases({"{f11}", "{fa1}"}, RISCV::F11_F)
24524 .Cases({"{f12}", "{fa2}"}, RISCV::F12_F)
24525 .Cases({"{f13}", "{fa3}"}, RISCV::F13_F)
24526 .Cases({"{f14}", "{fa4}"}, RISCV::F14_F)
24527 .Cases({"{f15}", "{fa5}"}, RISCV::F15_F)
24528 .Cases({"{f16}", "{fa6}"}, RISCV::F16_F)
24529 .Cases({"{f17}", "{fa7}"}, RISCV::F17_F)
24530 .Cases({"{f18}", "{fs2}"}, RISCV::F18_F)
24531 .Cases({"{f19}", "{fs3}"}, RISCV::F19_F)
24532 .Cases({"{f20}", "{fs4}"}, RISCV::F20_F)
24533 .Cases({"{f21}", "{fs5}"}, RISCV::F21_F)
24534 .Cases({"{f22}", "{fs6}"}, RISCV::F22_F)
24535 .Cases({"{f23}", "{fs7}"}, RISCV::F23_F)
24536 .Cases({"{f24}", "{fs8}"}, RISCV::F24_F)
24537 .Cases({"{f25}", "{fs9}"}, RISCV::F25_F)
24538 .Cases({"{f26}", "{fs10}"}, RISCV::F26_F)
24539 .Cases({"{f27}", "{fs11}"}, RISCV::F27_F)
24540 .Cases({"{f28}", "{ft8}"}, RISCV::F28_F)
24541 .Cases({"{f29}", "{ft9}"}, RISCV::F29_F)
24542 .Cases({"{f30}", "{ft10}"}, RISCV::F30_F)
24543 .Cases({"{f31}", "{ft11}"}, RISCV::F31_F)
24544 .Default(RISCV::NoRegister);
24545 if (FReg != RISCV::NoRegister) {
24546 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
24547 if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
24548 unsigned RegNo = FReg - RISCV::F0_F;
24549 unsigned DReg = RISCV::F0_D + RegNo;
24550 return std::make_pair(DReg, &RISCV::FPR64RegClass);
24551 }
24552 if (VT == MVT::f32 || VT == MVT::Other)
24553 return std::make_pair(FReg, &RISCV::FPR32RegClass);
24554 if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) {
24555 unsigned RegNo = FReg - RISCV::F0_F;
24556 unsigned HReg = RISCV::F0_H + RegNo;
24557 return std::make_pair(HReg, &RISCV::FPR16RegClass);
24558 }
24559 }
24560 }
24561
24562 if (Subtarget.hasVInstructions()) {
24563 Register VReg = StringSwitch<Register>(Constraint.lower())
24564 .Case("{v0}", RISCV::V0)
24565 .Case("{v1}", RISCV::V1)
24566 .Case("{v2}", RISCV::V2)
24567 .Case("{v3}", RISCV::V3)
24568 .Case("{v4}", RISCV::V4)
24569 .Case("{v5}", RISCV::V5)
24570 .Case("{v6}", RISCV::V6)
24571 .Case("{v7}", RISCV::V7)
24572 .Case("{v8}", RISCV::V8)
24573 .Case("{v9}", RISCV::V9)
24574 .Case("{v10}", RISCV::V10)
24575 .Case("{v11}", RISCV::V11)
24576 .Case("{v12}", RISCV::V12)
24577 .Case("{v13}", RISCV::V13)
24578 .Case("{v14}", RISCV::V14)
24579 .Case("{v15}", RISCV::V15)
24580 .Case("{v16}", RISCV::V16)
24581 .Case("{v17}", RISCV::V17)
24582 .Case("{v18}", RISCV::V18)
24583 .Case("{v19}", RISCV::V19)
24584 .Case("{v20}", RISCV::V20)
24585 .Case("{v21}", RISCV::V21)
24586 .Case("{v22}", RISCV::V22)
24587 .Case("{v23}", RISCV::V23)
24588 .Case("{v24}", RISCV::V24)
24589 .Case("{v25}", RISCV::V25)
24590 .Case("{v26}", RISCV::V26)
24591 .Case("{v27}", RISCV::V27)
24592 .Case("{v28}", RISCV::V28)
24593 .Case("{v29}", RISCV::V29)
24594 .Case("{v30}", RISCV::V30)
24595 .Case("{v31}", RISCV::V31)
24596 .Default(RISCV::NoRegister);
24597 if (VReg != RISCV::NoRegister) {
24598 if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
24599 return std::make_pair(VReg, &RISCV::VMRegClass);
24600 if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
24601 return std::make_pair(VReg, &RISCV::VRRegClass);
24602 for (const auto *RC :
24603 {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
24604 if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
24605 VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
24606 return std::make_pair(VReg, RC);
24607 }
24608 }
24609 }
24610 }
24611
24612 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
24613}
24614
24617 // Currently only support length 1 constraints.
24618 if (ConstraintCode.size() == 1) {
24619 switch (ConstraintCode[0]) {
24620 case 'A':
24622 default:
24623 break;
24624 }
24625 }
24626
24627 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
24628}
24629
24631 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
24632 SelectionDAG &DAG) const {
24633 // Currently only support length 1 constraints.
24634 if (Constraint.size() == 1) {
24635 switch (Constraint[0]) {
24636 case 'I':
24637 // Validate & create a 12-bit signed immediate operand.
24638 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
24639 uint64_t CVal = C->getSExtValue();
24640 if (isInt<12>(CVal))
24641 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
24642 Subtarget.getXLenVT()));
24643 }
24644 return;
24645 case 'J':
24646 // Validate & create an integer zero operand.
24647 if (isNullConstant(Op))
24648 Ops.push_back(
24649 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
24650 return;
24651 case 'K':
24652 // Validate & create a 5-bit unsigned immediate operand.
24653 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
24654 uint64_t CVal = C->getZExtValue();
24655 if (isUInt<5>(CVal))
24656 Ops.push_back(
24657 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
24658 }
24659 return;
24660 case 'S':
24662 return;
24663 default:
24664 break;
24665 }
24666 }
24668}
24669
24671 Instruction *Inst,
24672 AtomicOrdering Ord) const {
24673 if (Subtarget.hasStdExtZtso()) {
24675 return Builder.CreateFence(Ord);
24676 return nullptr;
24677 }
24678
24680 return Builder.CreateFence(Ord);
24681 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
24682 return Builder.CreateFence(AtomicOrdering::Release);
24683 return nullptr;
24684}
24685
24687 Instruction *Inst,
24688 AtomicOrdering Ord) const {
24689 if (Subtarget.hasStdExtZtso()) {
24691 return Builder.CreateFence(Ord);
24692 return nullptr;
24693 }
24694
24695 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
24696 return Builder.CreateFence(AtomicOrdering::Acquire);
24697 if (Subtarget.enableTrailingSeqCstFence() && isa<StoreInst>(Inst) &&
24699 return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
24700 return nullptr;
24701}
24702
24705 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
24706 // point operations can't be used in an lr/sc sequence without breaking the
24707 // forward-progress guarantee.
24708 if (AI->isFloatingPointOperation() ||
24714
24715 // Don't expand forced atomics, we want to have __sync libcalls instead.
24716 if (Subtarget.hasForcedAtomics())
24718
24719 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
24720 if (AI->getOperation() == AtomicRMWInst::Nand) {
24721 if (Subtarget.hasStdExtZacas() &&
24722 (Size >= 32 || Subtarget.hasStdExtZabha()))
24724 if (Size < 32)
24726 }
24727
24728 if (Size < 32 && !Subtarget.hasStdExtZabha())
24730
24732}
24733
24734static Intrinsic::ID
24736 switch (BinOp) {
24737 default:
24738 llvm_unreachable("Unexpected AtomicRMW BinOp");
24740 return Intrinsic::riscv_masked_atomicrmw_xchg;
24741 case AtomicRMWInst::Add:
24742 return Intrinsic::riscv_masked_atomicrmw_add;
24743 case AtomicRMWInst::Sub:
24744 return Intrinsic::riscv_masked_atomicrmw_sub;
24746 return Intrinsic::riscv_masked_atomicrmw_nand;
24747 case AtomicRMWInst::Max:
24748 return Intrinsic::riscv_masked_atomicrmw_max;
24749 case AtomicRMWInst::Min:
24750 return Intrinsic::riscv_masked_atomicrmw_min;
24752 return Intrinsic::riscv_masked_atomicrmw_umax;
24754 return Intrinsic::riscv_masked_atomicrmw_umin;
24755 }
24756}
24757
24759 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
24760 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
24761 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
24762 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
24763 // mask, as this produces better code than the LR/SC loop emitted by
24764 // int_riscv_masked_atomicrmw_xchg.
24765 if (AI->getOperation() == AtomicRMWInst::Xchg &&
24768 if (CVal->isZero())
24769 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
24770 Builder.CreateNot(Mask, "Inv_Mask"),
24771 AI->getAlign(), Ord);
24772 if (CVal->isMinusOne())
24773 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
24774 AI->getAlign(), Ord);
24775 }
24776
24777 unsigned XLen = Subtarget.getXLen();
24778 Value *Ordering =
24779 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
24780 Type *Tys[] = {Builder.getIntNTy(XLen), AlignedAddr->getType()};
24782 AI->getModule(),
24784
24785 if (XLen == 64) {
24786 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
24787 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
24788 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
24789 }
24790
24791 Value *Result;
24792
24793 // Must pass the shift amount needed to sign extend the loaded value prior
24794 // to performing a signed comparison for min/max. ShiftAmt is the number of
24795 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
24796 // is the number of bits to left+right shift the value in order to
24797 // sign-extend.
24798 if (AI->getOperation() == AtomicRMWInst::Min ||
24800 const DataLayout &DL = AI->getDataLayout();
24801 unsigned ValWidth =
24802 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
24803 Value *SextShamt =
24804 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
24805 Result = Builder.CreateCall(LrwOpScwLoop,
24806 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
24807 } else {
24808 Result =
24809 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
24810 }
24811
24812 if (XLen == 64)
24813 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
24814 return Result;
24815}
24816
24819 AtomicCmpXchgInst *CI) const {
24820 // Don't expand forced atomics, we want to have __sync libcalls instead.
24821 if (Subtarget.hasForcedAtomics())
24823
24825 if (!(Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas()) &&
24826 (Size == 8 || Size == 16))
24829}
24830
24832 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
24833 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
24834 unsigned XLen = Subtarget.getXLen();
24835 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
24836 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg;
24837 if (XLen == 64) {
24838 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
24839 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
24840 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
24841 }
24842 Type *Tys[] = {Builder.getIntNTy(XLen), AlignedAddr->getType()};
24843 Value *Result = Builder.CreateIntrinsic(
24844 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
24845 if (XLen == 64)
24846 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
24847 return Result;
24848}
24849
24851 EVT DataVT) const {
24852 // We have indexed loads for all supported EEW types. Indices are always
24853 // zero extended.
24854 return Extend.getOpcode() == ISD::ZERO_EXTEND &&
24855 isTypeLegal(Extend.getValueType()) &&
24856 isTypeLegal(Extend.getOperand(0).getValueType()) &&
24857 Extend.getOperand(0).getValueType().getVectorElementType() != MVT::i1;
24858}
24859
24861 EVT VT) const {
24862 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
24863 return false;
24864
24865 switch (FPVT.getSimpleVT().SimpleTy) {
24866 case MVT::f16:
24867 return Subtarget.hasStdExtZfhmin();
24868 case MVT::f32:
24869 return Subtarget.hasStdExtF();
24870 case MVT::f64:
24871 return Subtarget.hasStdExtD();
24872 default:
24873 return false;
24874 }
24875}
24876
24878 // If we are using the small code model, we can reduce size of jump table
24879 // entry to 4 bytes.
24880 if (Subtarget.is64Bit() && !isPositionIndependent() &&
24883 }
24885}
24886
24888 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
24889 unsigned uid, MCContext &Ctx) const {
24890 assert(Subtarget.is64Bit() && !isPositionIndependent() &&
24892 return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
24893}
24894
24896 // We define vscale to be VLEN/RVVBitsPerBlock. VLEN is always a power
24897 // of two >= 64, and RVVBitsPerBlock is 64. Thus, vscale must be
24898 // a power of two as well.
24899 // FIXME: This doesn't work for zve32, but that's already broken
24900 // elsewhere for the same reason.
24901 assert(Subtarget.getRealMinVLen() >= 64 && "zve32* unsupported");
24902 static_assert(RISCV::RVVBitsPerBlock == 64,
24903 "RVVBitsPerBlock changed, audit needed");
24904 return true;
24905}
24906
24908 SDValue &Offset,
24910 SelectionDAG &DAG) const {
24911 // Target does not support indexed loads.
24912 if (!Subtarget.hasVendorXTHeadMemIdx())
24913 return false;
24914
24915 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
24916 return false;
24917
24918 Base = Op->getOperand(0);
24919 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
24920 int64_t RHSC = RHS->getSExtValue();
24921 if (Op->getOpcode() == ISD::SUB)
24922 RHSC = -(uint64_t)RHSC;
24923
24924 // The constants that can be encoded in the THeadMemIdx instructions
24925 // are of the form (sign_extend(imm5) << imm2).
24926 bool isLegalIndexedOffset = false;
24927 for (unsigned i = 0; i < 4; i++)
24928 if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
24929 isLegalIndexedOffset = true;
24930 break;
24931 }
24932
24933 if (!isLegalIndexedOffset)
24934 return false;
24935
24936 Offset = Op->getOperand(1);
24937 return true;
24938 }
24939
24940 return false;
24941}
24942
24944 SDValue &Offset,
24946 SelectionDAG &DAG) const {
24947 EVT VT;
24948 SDValue Ptr;
24949 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
24950 VT = LD->getMemoryVT();
24951 Ptr = LD->getBasePtr();
24952 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
24953 VT = ST->getMemoryVT();
24954 Ptr = ST->getBasePtr();
24955 } else
24956 return false;
24957
24958 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, DAG))
24959 return false;
24960
24961 AM = ISD::PRE_INC;
24962 return true;
24963}
24964
24966 SDValue &Base,
24967 SDValue &Offset,
24969 SelectionDAG &DAG) const {
24970 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
24971 if (Op->getOpcode() != ISD::ADD)
24972 return false;
24973
24975 Base = LS->getBasePtr();
24976 else
24977 return false;
24978
24979 if (Base == Op->getOperand(0))
24980 Offset = Op->getOperand(1);
24981 else if (Base == Op->getOperand(1))
24982 Offset = Op->getOperand(0);
24983 else
24984 return false;
24985
24986 AM = ISD::POST_INC;
24987 return true;
24988 }
24989
24990 EVT VT;
24991 SDValue Ptr;
24992 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
24993 VT = LD->getMemoryVT();
24994 Ptr = LD->getBasePtr();
24995 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
24996 VT = ST->getMemoryVT();
24997 Ptr = ST->getBasePtr();
24998 } else
24999 return false;
25000
25001 if (!getIndexedAddressParts(Op, Base, Offset, AM, DAG))
25002 return false;
25003 // Post-indexing updates the base, so it's not a valid transform
25004 // if that's not the same as the load's pointer.
25005 if (Ptr != Base)
25006 return false;
25007
25008 AM = ISD::POST_INC;
25009 return true;
25010}
25011
25013 EVT VT) const {
25014 EVT SVT = VT.getScalarType();
25015
25016 if (!SVT.isSimple())
25017 return false;
25018
25019 switch (SVT.getSimpleVT().SimpleTy) {
25020 case MVT::f16:
25021 return VT.isVector() ? Subtarget.hasVInstructionsF16()
25022 : Subtarget.hasStdExtZfhOrZhinx();
25023 case MVT::f32:
25024 return Subtarget.hasStdExtFOrZfinx();
25025 case MVT::f64:
25026 return Subtarget.hasStdExtDOrZdinx();
25027 default:
25028 break;
25029 }
25030
25031 return false;
25032}
25033
25035 // Zacas will use amocas.w which does not require extension.
25036 return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
25037}
25038
25040 // Zaamo will use amo<op>.w which does not require extension.
25041 if (Subtarget.hasStdExtZaamo() || Subtarget.hasForcedAtomics())
25042 return ISD::ANY_EXTEND;
25043
25044 // Zalrsc pseudo expansions with comparison require sign-extension.
25045 assert(Subtarget.hasStdExtZalrsc());
25046 switch (Op) {
25051 return ISD::SIGN_EXTEND;
25052 default:
25053 break;
25054 }
25055 return ISD::ANY_EXTEND;
25056}
25057
25059 const Constant *PersonalityFn) const {
25060 return RISCV::X10;
25061}
25062
25064 const Constant *PersonalityFn) const {
25065 return RISCV::X11;
25066}
25067
25069 // Return false to suppress the unnecessary extensions if the LibCall
25070 // arguments or return value is a float narrower than XLEN on a soft FP ABI.
25071 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
25072 Type.getSizeInBits() < Subtarget.getXLen()))
25073 return false;
25074
25075 return true;
25076}
25077
25079 bool IsSigned) const {
25080 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
25081 return true;
25082
25083 return IsSigned;
25084}
25085
25087 SDValue C) const {
25088 // Check integral scalar types.
25089 if (!VT.isScalarInteger())
25090 return false;
25091
25092 // Omit the optimization if the sub target has the M extension and the data
25093 // size exceeds XLen.
25094 const bool HasZmmul = Subtarget.hasStdExtZmmul();
25095 if (HasZmmul && VT.getSizeInBits() > Subtarget.getXLen())
25096 return false;
25097
25098 auto *ConstNode = cast<ConstantSDNode>(C);
25099 const APInt &Imm = ConstNode->getAPIntValue();
25100
25101 // Don't do this if the Xqciac extension is enabled and the Imm in simm12.
25102 if (Subtarget.hasVendorXqciac() && Imm.isSignedIntN(12))
25103 return false;
25104
25105 // Break the MUL to a SLLI and an ADD/SUB.
25106 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
25107 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
25108 return true;
25109
25110 // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
25111 if (Subtarget.hasShlAdd(3) && !Imm.isSignedIntN(12) &&
25112 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
25113 (Imm - 8).isPowerOf2()))
25114 return true;
25115
25116 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
25117 // a pair of LUI/ADDI.
25118 if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
25119 ConstNode->hasOneUse()) {
25120 APInt ImmS = Imm.ashr(Imm.countr_zero());
25121 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
25122 (1 - ImmS).isPowerOf2())
25123 return true;
25124 }
25125
25126 return false;
25127}
25128
25130 SDValue ConstNode) const {
25131 // Let the DAGCombiner decide for vectors.
25132 EVT VT = AddNode.getValueType();
25133 if (VT.isVector())
25134 return true;
25135
25136 // Let the DAGCombiner decide for larger types.
25137 if (VT.getScalarSizeInBits() > Subtarget.getXLen())
25138 return true;
25139
25140 // It is worse if c1 is simm12 while c1*c2 is not.
25141 ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
25142 ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
25143 const APInt &C1 = C1Node->getAPIntValue();
25144 const APInt &C2 = C2Node->getAPIntValue();
25145 if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
25146 return false;
25147
25148 // Default to true and let the DAGCombiner decide.
25149 return true;
25150}
25151
25153 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
25154 unsigned *Fast) const {
25155 if (!VT.isVector()) {
25156 if (Fast)
25157 *Fast = Subtarget.enableUnalignedScalarMem();
25158 return Subtarget.enableUnalignedScalarMem();
25159 }
25160
25161 // All vector implementations must support element alignment
25162 EVT ElemVT = VT.getVectorElementType();
25163 if (Alignment >= ElemVT.getStoreSize()) {
25164 if (Fast)
25165 *Fast = 1;
25166 return true;
25167 }
25168
25169 // Note: We lower an unmasked unaligned vector access to an equally sized
25170 // e8 element type access. Given this, we effectively support all unmasked
25171 // misaligned accesses. TODO: Work through the codegen implications of
25172 // allowing such accesses to be formed, and considered fast.
25173 if (Fast)
25174 *Fast = Subtarget.enableUnalignedVectorMem();
25175 return Subtarget.enableUnalignedVectorMem();
25176}
25177
25179 LLVMContext &Context, const MemOp &Op,
25180 const AttributeList &FuncAttributes) const {
25181 if (!Subtarget.hasVInstructions())
25182 return MVT::Other;
25183
25184 if (FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat))
25185 return MVT::Other;
25186
25187 // We use LMUL1 memory operations here for a non-obvious reason. Our caller
25188 // has an expansion threshold, and we want the number of hardware memory
25189 // operations to correspond roughly to that threshold. LMUL>1 operations
25190 // are typically expanded linearly internally, and thus correspond to more
25191 // than one actual memory operation. Note that store merging and load
25192 // combining will typically form larger LMUL operations from the LMUL1
25193 // operations emitted here, and that's okay because combining isn't
25194 // introducing new memory operations; it's just merging existing ones.
25195 // NOTE: We limit to 1024 bytes to avoid creating an invalid MVT.
25196 const unsigned MinVLenInBytes =
25197 std::min(Subtarget.getRealMinVLen() / 8, 1024U);
25198
25199 if (Op.size() < MinVLenInBytes)
25200 // TODO: Figure out short memops. For the moment, do the default thing
25201 // which ends up using scalar sequences.
25202 return MVT::Other;
25203
25204 // If the minimum VLEN is less than RISCV::RVVBitsPerBlock we don't support
25205 // fixed vectors.
25206 if (MinVLenInBytes <= RISCV::RVVBytesPerBlock)
25207 return MVT::Other;
25208
25209 // Prefer i8 for non-zero memset as it allows us to avoid materializing
25210 // a large scalar constant and instead use vmv.v.x/i to do the
25211 // broadcast. For everything else, prefer ELenVT to minimize VL and thus
25212 // maximize the chance we can encode the size in the vsetvli.
25213 MVT ELenVT = MVT::getIntegerVT(Subtarget.getELen());
25214 MVT PreferredVT = (Op.isMemset() && !Op.isZeroMemset()) ? MVT::i8 : ELenVT;
25215
25216 // Do we have sufficient alignment for our preferred VT? If not, revert
25217 // to largest size allowed by our alignment criteria.
25218 if (PreferredVT != MVT::i8 && !Subtarget.enableUnalignedVectorMem()) {
25219 Align RequiredAlign(PreferredVT.getStoreSize());
25220 if (Op.isFixedDstAlign())
25221 RequiredAlign = std::min(RequiredAlign, Op.getDstAlign());
25222 if (Op.isMemcpy())
25223 RequiredAlign = std::min(RequiredAlign, Op.getSrcAlign());
25224 PreferredVT = MVT::getIntegerVT(RequiredAlign.value() * 8);
25225 }
25226 return MVT::getVectorVT(PreferredVT, MinVLenInBytes/PreferredVT.getStoreSize());
25227}
25228
25230 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
25231 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
25232 bool IsABIRegCopy = CC.has_value();
25233 EVT ValueVT = Val.getValueType();
25234
25235 MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;
25236 if ((ValueVT == PairVT ||
25237 (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&
25238 ValueVT == MVT::f64)) &&
25239 NumParts == 1 && PartVT == MVT::Untyped) {
25240 // Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx
25241 MVT XLenVT = Subtarget.getXLenVT();
25242 if (ValueVT == MVT::f64)
25243 Val = DAG.getBitcast(MVT::i64, Val);
25244 auto [Lo, Hi] = DAG.SplitScalar(Val, DL, XLenVT, XLenVT);
25245 // Always creating an MVT::Untyped part, so always use
25246 // RISCVISD::BuildGPRPair.
25247 Parts[0] = DAG.getNode(RISCVISD::BuildGPRPair, DL, PartVT, Lo, Hi);
25248 return true;
25249 }
25250
25251 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
25252 PartVT == MVT::f32) {
25253 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
25254 // nan, and cast to f32.
25255 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
25256 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
25257 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
25258 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
25259 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
25260 Parts[0] = Val;
25261 return true;
25262 }
25263
25264 if (ValueVT.isRISCVVectorTuple() && PartVT.isRISCVVectorTuple()) {
25265#ifndef NDEBUG
25266 unsigned ValNF = ValueVT.getRISCVVectorTupleNumFields();
25267 [[maybe_unused]] unsigned ValLMUL =
25269 ValNF * RISCV::RVVBitsPerBlock);
25270 unsigned PartNF = PartVT.getRISCVVectorTupleNumFields();
25271 [[maybe_unused]] unsigned PartLMUL =
25273 PartNF * RISCV::RVVBitsPerBlock);
25274 assert(ValNF == PartNF && ValLMUL == PartLMUL &&
25275 "RISC-V vector tuple type only accepts same register class type "
25276 "TUPLE_INSERT");
25277#endif
25278
25279 Val = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, PartVT, DAG.getUNDEF(PartVT),
25280 Val, DAG.getTargetConstant(0, DL, MVT::i32));
25281 Parts[0] = Val;
25282 return true;
25283 }
25284
25285 if (ValueVT.isFixedLengthVector() && PartVT.isScalableVector()) {
25286 ValueVT = getContainerForFixedLengthVector(ValueVT.getSimpleVT());
25287 Val = convertToScalableVector(ValueVT, Val, DAG, Subtarget);
25288
25289 LLVMContext &Context = *DAG.getContext();
25290 EVT ValueEltVT = ValueVT.getVectorElementType();
25291 EVT PartEltVT = PartVT.getVectorElementType();
25292 unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinValue();
25293 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
25294 if (PartVTBitSize % ValueVTBitSize == 0) {
25295 assert(PartVTBitSize >= ValueVTBitSize);
25296 // If the element types are different, bitcast to the same element type of
25297 // PartVT first.
25298 // Give an example here, we want copy a <vscale x 1 x i8> value to
25299 // <vscale x 4 x i16>.
25300 // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
25301 // subvector, then we can bitcast to <vscale x 4 x i16>.
25302 if (ValueEltVT != PartEltVT) {
25303 if (PartVTBitSize > ValueVTBitSize) {
25304 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
25305 assert(Count != 0 && "The number of element should not be zero.");
25306 EVT SameEltTypeVT =
25307 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
25308 Val = DAG.getInsertSubvector(DL, DAG.getUNDEF(SameEltTypeVT), Val, 0);
25309 }
25310 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
25311 } else {
25312 Val = DAG.getInsertSubvector(DL, DAG.getUNDEF(PartVT), Val, 0);
25313 }
25314 Parts[0] = Val;
25315 return true;
25316 }
25317 }
25318
25319 return false;
25320}
25321
25323 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
25324 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
25325 bool IsABIRegCopy = CC.has_value();
25326
25327 MVT PairVT = Subtarget.is64Bit() ? MVT::i128 : MVT::i64;
25328 if ((ValueVT == PairVT ||
25329 (!Subtarget.is64Bit() && Subtarget.hasStdExtZdinx() &&
25330 ValueVT == MVT::f64)) &&
25331 NumParts == 1 && PartVT == MVT::Untyped) {
25332 // Pairs in Inline Assembly, f64 in Inline assembly on rv32_zdinx
25333 MVT XLenVT = Subtarget.getXLenVT();
25334
25335 SDValue Val = Parts[0];
25336 // Always starting with an MVT::Untyped part, so always use
25337 // RISCVISD::SplitGPRPair
25338 Val = DAG.getNode(RISCVISD::SplitGPRPair, DL, DAG.getVTList(XLenVT, XLenVT),
25339 Val);
25340 Val = DAG.getNode(ISD::BUILD_PAIR, DL, PairVT, Val.getValue(0),
25341 Val.getValue(1));
25342 if (ValueVT == MVT::f64)
25343 Val = DAG.getBitcast(ValueVT, Val);
25344 return Val;
25345 }
25346
25347 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
25348 PartVT == MVT::f32) {
25349 SDValue Val = Parts[0];
25350
25351 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
25352 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
25353 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
25354 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
25355 return Val;
25356 }
25357
25358 if (ValueVT.isFixedLengthVector() && PartVT.isScalableVector()) {
25359 LLVMContext &Context = *DAG.getContext();
25360 SDValue Val = Parts[0];
25361 EVT ValueEltVT = ValueVT.getVectorElementType();
25362 EVT PartEltVT = PartVT.getVectorElementType();
25363
25364 unsigned ValueVTBitSize =
25366 .getSizeInBits()
25368
25369 unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinValue();
25370 if (PartVTBitSize % ValueVTBitSize == 0) {
25371 assert(PartVTBitSize >= ValueVTBitSize);
25372 EVT SameEltTypeVT = ValueVT;
25373 // If the element types are different, convert it to the same element type
25374 // of PartVT.
25375 // Give an example here, we want copy a <vscale x 1 x i8> value from
25376 // <vscale x 4 x i16>.
25377 // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
25378 // then we can extract <vscale x 1 x i8>.
25379 if (ValueEltVT != PartEltVT) {
25380 unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
25381 assert(Count != 0 && "The number of element should not be zero.");
25382 SameEltTypeVT =
25383 EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
25384 Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
25385 }
25386 if (ValueVT.isFixedLengthVector())
25387 Val = convertFromScalableVector(ValueVT, Val, DAG, Subtarget);
25388 else
25389 Val = DAG.getExtractSubvector(DL, ValueVT, Val, 0);
25390 return Val;
25391 }
25392 }
25393 return SDValue();
25394}
25395
25396bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
25397 // When aggressively optimizing for code size, we prefer to use a div
25398 // instruction, as it is usually smaller than the alternative sequence.
25399 // TODO: Add vector division?
25400 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
25401 return OptSize && !VT.isVector() &&
25403}
25404
25406 // Scalarize zero_ext and sign_ext might stop match to widening instruction in
25407 // some situation.
25408 unsigned Opc = N->getOpcode();
25410 return false;
25411 return true;
25412}
25413
25414static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
25415 Module *M = IRB.GetInsertBlock()->getModule();
25416 Function *ThreadPointerFunc = Intrinsic::getOrInsertDeclaration(
25417 M, Intrinsic::thread_pointer, IRB.getPtrTy());
25418 return IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
25419 IRB.CreateCall(ThreadPointerFunc), Offset);
25420}
25421
25423 // Fuchsia provides a fixed TLS slot for the stack cookie.
25424 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
25425 if (Subtarget.isTargetFuchsia())
25426 return useTpOffset(IRB, -0x10);
25427
25428 // Android provides a fixed TLS slot for the stack cookie. See the definition
25429 // of TLS_SLOT_STACK_GUARD in
25430 // https://android.googlesource.com/platform/bionic/+/main/libc/platform/bionic/tls_defines.h
25431 if (Subtarget.isTargetAndroid())
25432 return useTpOffset(IRB, -0x18);
25433
25434 Module *M = IRB.GetInsertBlock()->getModule();
25435
25436 if (M->getStackProtectorGuard() == "tls") {
25437 // Users must specify the offset explicitly
25438 int Offset = M->getStackProtectorGuardOffset();
25439 return useTpOffset(IRB, Offset);
25440 }
25441
25443}
25444
25446 Align Alignment) const {
25447 if (!Subtarget.hasVInstructions())
25448 return false;
25449
25450 // Only support fixed vectors if we know the minimum vector size.
25451 if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
25452 return false;
25453
25454 EVT ScalarType = DataType.getScalarType();
25455 if (!isLegalElementTypeForRVV(ScalarType))
25456 return false;
25457
25458 if (!Subtarget.enableUnalignedVectorMem() &&
25459 Alignment < ScalarType.getStoreSize())
25460 return false;
25461
25462 return true;
25463}
25464
25466 Align Alignment) const {
25467 if (!Subtarget.hasVInstructions())
25468 return false;
25469
25470 EVT ScalarType = DataType.getScalarType();
25471 if (!isLegalElementTypeForRVV(ScalarType))
25472 return false;
25473
25474 if (!Subtarget.enableUnalignedVectorMem() &&
25475 Alignment < ScalarType.getStoreSize())
25476 return false;
25477
25478 return true;
25479}
25480
25484 const TargetInstrInfo *TII) const {
25485 assert(MBBI->isCall() && MBBI->getCFIType() &&
25486 "Invalid call instruction for a KCFI check");
25487 assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
25488 MBBI->getOpcode()));
25489
25490 MachineOperand &Target = MBBI->getOperand(0);
25491 Target.setIsRenamable(false);
25492
25493 return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
25494 .addReg(Target.getReg())
25495 .addImm(MBBI->getCFIType())
25496 .getInstr();
25497}
25498
25499#define GET_REGISTER_MATCHER
25500#include "RISCVGenAsmMatcher.inc"
25501
25504 const MachineFunction &MF) const {
25506 if (!Reg)
25508 if (!Reg)
25509 return Reg;
25510
25511 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
25512 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
25513 reportFatalUsageError(Twine("Trying to obtain non-reserved register \"" +
25514 StringRef(RegName) + "\"."));
25515 return Reg;
25516}
25517
25520 const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);
25521
25522 if (NontemporalInfo == nullptr)
25524
25525 // 1 for default value work as __RISCV_NTLH_ALL
25526 // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
25527 // 3 -> __RISCV_NTLH_ALL_PRIVATE
25528 // 4 -> __RISCV_NTLH_INNERMOST_SHARED
25529 // 5 -> __RISCV_NTLH_ALL
25530 int NontemporalLevel = 5;
25531 const MDNode *RISCVNontemporalInfo =
25532 I.getMetadata("riscv-nontemporal-domain");
25533 if (RISCVNontemporalInfo != nullptr)
25534 NontemporalLevel =
25536 cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
25537 ->getValue())
25538 ->getZExtValue();
25539
25540 assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
25541 "RISC-V target doesn't support this non-temporal domain.");
25542
25543 NontemporalLevel -= 2;
25545 if (NontemporalLevel & 0b1)
25546 Flags |= MONontemporalBit0;
25547 if (NontemporalLevel & 0b10)
25548 Flags |= MONontemporalBit1;
25549
25550 return Flags;
25551}
25552
25555
25556 MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
25558 TargetFlags |= (NodeFlags & MONontemporalBit0);
25559 TargetFlags |= (NodeFlags & MONontemporalBit1);
25560 return TargetFlags;
25561}
25562
25564 const MemSDNode &NodeX, const MemSDNode &NodeY) const {
25565 return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
25566}
25567
25569 if (VT.isVector()) {
25570 EVT SVT = VT.getVectorElementType();
25571 // If the element type is legal we can use cpop.v if it is enabled.
25572 if (isLegalElementTypeForRVV(SVT))
25573 return Subtarget.hasStdExtZvbb();
25574 // Don't consider it fast if the type needs to be legalized or scalarized.
25575 return false;
25576 }
25577
25578 return Subtarget.hasCPOPLike() && (VT == MVT::i32 || VT == MVT::i64);
25579}
25580
25582 ISD::CondCode Cond) const {
25583 return isCtpopFast(VT) ? 0 : 1;
25584}
25585
25587 const Instruction *I) const {
25588 if (Subtarget.hasStdExtZalasr()) {
25589 if (Subtarget.hasStdExtZtso()) {
25590 // Zalasr + TSO means that atomic_load_acquire and atomic_store_release
25591 // should be lowered to plain load/store. The easiest way to do this is
25592 // to say we should insert fences for them, and the fence insertion code
25593 // will just not insert any fences
25594 auto *LI = dyn_cast<LoadInst>(I);
25595 auto *SI = dyn_cast<StoreInst>(I);
25596 if ((LI &&
25597 (LI->getOrdering() == AtomicOrdering::SequentiallyConsistent)) ||
25598 (SI &&
25599 (SI->getOrdering() == AtomicOrdering::SequentiallyConsistent))) {
25600 // Here, this is a load or store which is seq_cst, and needs a .aq or
25601 // .rl therefore we shouldn't try to insert fences
25602 return false;
25603 }
25604 // Here, we are a TSO inst that isn't a seq_cst load/store
25605 return isa<LoadInst>(I) || isa<StoreInst>(I);
25606 }
25607 return false;
25608 }
25609 // Note that one specific case requires fence insertion for an
25610 // AtomicCmpXchgInst but is handled via the RISCVZacasABIFix pass rather
25611 // than this hook due to limitations in the interface here.
25612 return isa<LoadInst>(I) || isa<StoreInst>(I);
25613}
25614
25616
25617 // GISel support is in progress or complete for these opcodes.
25618 unsigned Op = Inst.getOpcode();
25619 if (Op == Instruction::Add || Op == Instruction::Sub ||
25620 Op == Instruction::And || Op == Instruction::Or ||
25621 Op == Instruction::Xor || Op == Instruction::InsertElement ||
25622 Op == Instruction::ShuffleVector || Op == Instruction::Load ||
25623 Op == Instruction::Freeze || Op == Instruction::Store)
25624 return false;
25625
25626 if (auto *II = dyn_cast<IntrinsicInst>(&Inst)) {
25627 // Mark RVV intrinsic as supported.
25628 if (RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(II->getIntrinsicID())) {
25629 // GISel doesn't support tuple types yet. It also doesn't suport returning
25630 // a struct containing a scalable vector like vleff.
25631 if (Inst.getType()->isRISCVVectorTupleTy() ||
25632 Inst.getType()->isStructTy())
25633 return true;
25634
25635 for (unsigned i = 0; i < II->arg_size(); ++i)
25636 if (II->getArgOperand(i)->getType()->isRISCVVectorTupleTy())
25637 return true;
25638
25639 return false;
25640 }
25641 if (II->getIntrinsicID() == Intrinsic::vector_extract)
25642 return false;
25643 }
25644
25645 if (Inst.getType()->isScalableTy())
25646 return true;
25647
25648 for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
25649 if (Inst.getOperand(i)->getType()->isScalableTy() &&
25650 !isa<ReturnInst>(&Inst))
25651 return true;
25652
25653 if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
25654 if (AI->getAllocatedType()->isScalableTy())
25655 return true;
25656 }
25657
25658 return false;
25659}
25660
25661SDValue
25662RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
25663 SelectionDAG &DAG,
25664 SmallVectorImpl<SDNode *> &Created) const {
25665 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
25666 if (isIntDivCheap(N->getValueType(0), Attr))
25667 return SDValue(N, 0); // Lower SDIV as SDIV
25668
25669 // Only perform this transform if short forward branch opt is supported.
25670 if (!Subtarget.hasShortForwardBranchIALU())
25671 return SDValue();
25672 EVT VT = N->getValueType(0);
25673 if (!(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())))
25674 return SDValue();
25675
25676 // Ensure 2**k-1 < 2048 so that we can just emit a single addi/addiw.
25677 if (Divisor.sgt(2048) || Divisor.slt(-2048))
25678 return SDValue();
25679 return TargetLowering::buildSDIVPow2WithCMov(N, Divisor, DAG, Created);
25680}
25681
25682bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
25683 EVT VT, const APInt &AndMask) const {
25684 if (Subtarget.hasCZEROLike() || Subtarget.hasVendorXTHeadCondMov())
25685 return !Subtarget.hasBEXTILike() && AndMask.ugt(1024);
25687}
25688
25690 return Subtarget.getMinimumJumpTableEntries();
25691}
25692
25694 SDValue Value, SDValue Addr,
25695 int JTI,
25696 SelectionDAG &DAG) const {
25697 if (Subtarget.hasStdExtZicfilp()) {
25698 // When Zicfilp enabled, we need to use software guarded branch for jump
25699 // table branch.
25700 SDValue Chain = Value;
25701 // Jump table debug info is only needed if CodeView is enabled.
25703 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
25704 return DAG.getNode(RISCVISD::SW_GUARDED_BRIND, dl, MVT::Other, Chain, Addr);
25705 }
25706 return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, JTI, DAG);
25707}
25708
25709// If an output pattern produces multiple instructions tablegen may pick an
25710// arbitrary type from an instructions destination register class to use for the
25711// VT of that MachineSDNode. This VT may be used to look up the representative
25712// register class. If the type isn't legal, the default implementation will
25713// not find a register class.
25714//
25715// Some integer types smaller than XLen are listed in the GPR register class to
25716// support isel patterns for GISel, but are not legal in SelectionDAG. The
25717// arbitrary type tablegen picks may be one of these smaller types.
25718//
25719// f16 and bf16 are both valid for the FPR16 or GPRF16 register class. It's
25720// possible for tablegen to pick bf16 as the arbitrary type for an f16 pattern.
25721std::pair<const TargetRegisterClass *, uint8_t>
25722RISCVTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
25723 MVT VT) const {
25724 switch (VT.SimpleTy) {
25725 default:
25726 break;
25727 case MVT::i8:
25728 case MVT::i16:
25729 case MVT::i32:
25731 case MVT::bf16:
25732 case MVT::f16:
25734 }
25735
25737}
25738
25740
25741#define GET_RISCVVIntrinsicsTable_IMPL
25742#include "RISCVGenSearchableTables.inc"
25743
25744} // namespace llvm::RISCVVIntrinsicsTable
25745
25747
25748 // If the function specifically requests inline stack probes, emit them.
25749 if (MF.getFunction().hasFnAttribute("probe-stack"))
25750 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
25751 "inline-asm";
25752
25753 return false;
25754}
25755
25757 Align StackAlign) const {
25758 // The default stack probe size is 4096 if the function has no
25759 // stack-probe-size attribute.
25760 const Function &Fn = MF.getFunction();
25761 unsigned StackProbeSize =
25762 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
25763 // Round down to the stack alignment.
25764 StackProbeSize = alignDown(StackProbeSize, StackAlign.value());
25765 return StackProbeSize ? StackProbeSize : StackAlign.value();
25766}
25767
25768SDValue RISCVTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
25769 SelectionDAG &DAG) const {
25771 if (!hasInlineStackProbe(MF))
25772 return SDValue();
25773
25774 MVT XLenVT = Subtarget.getXLenVT();
25775 // Get the inputs.
25776 SDValue Chain = Op.getOperand(0);
25777 SDValue Size = Op.getOperand(1);
25778
25780 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
25781 SDLoc dl(Op);
25782 EVT VT = Op.getValueType();
25783
25784 // Construct the new SP value in a GPR.
25785 SDValue SP = DAG.getCopyFromReg(Chain, dl, RISCV::X2, XLenVT);
25786 Chain = SP.getValue(1);
25787 SP = DAG.getNode(ISD::SUB, dl, XLenVT, SP, Size);
25788 if (Align)
25789 SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
25790 DAG.getSignedConstant(-Align->value(), dl, VT));
25791
25792 // Set the real SP to the new value with a probing loop.
25793 Chain = DAG.getNode(RISCVISD::PROBED_ALLOCA, dl, MVT::Other, Chain, SP);
25794 return DAG.getMergeValues({SP, Chain}, dl);
25795}
25796
25799 MachineBasicBlock *MBB) const {
25800 MachineFunction &MF = *MBB->getParent();
25801 MachineBasicBlock::iterator MBBI = MI.getIterator();
25802 DebugLoc DL = MBB->findDebugLoc(MBBI);
25803 Register TargetReg = MI.getOperand(0).getReg();
25804
25805 const RISCVInstrInfo *TII = Subtarget.getInstrInfo();
25806 bool IsRV64 = Subtarget.is64Bit();
25807 Align StackAlign = Subtarget.getFrameLowering()->getStackAlign();
25808 const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
25809 uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign);
25810
25811 MachineFunction::iterator MBBInsertPoint = std::next(MBB->getIterator());
25812 MachineBasicBlock *LoopTestMBB =
25813 MF.CreateMachineBasicBlock(MBB->getBasicBlock());
25814 MF.insert(MBBInsertPoint, LoopTestMBB);
25815 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock());
25816 MF.insert(MBBInsertPoint, ExitMBB);
25817 Register SPReg = RISCV::X2;
25818 Register ScratchReg =
25819 MF.getRegInfo().createVirtualRegister(&RISCV::GPRRegClass);
25820
25821 // ScratchReg = ProbeSize
25822 TII->movImm(*MBB, MBBI, DL, ScratchReg, ProbeSize, MachineInstr::NoFlags);
25823
25824 // LoopTest:
25825 // SUB SP, SP, ProbeSize
25826 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::SUB), SPReg)
25827 .addReg(SPReg)
25828 .addReg(ScratchReg);
25829
25830 // s[d|w] zero, 0(sp)
25831 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL,
25832 TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
25833 .addReg(RISCV::X0)
25834 .addReg(SPReg)
25835 .addImm(0);
25836
25837 // BLT TargetReg, SP, LoopTest
25838 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::BLT))
25839 .addReg(TargetReg)
25840 .addReg(SPReg)
25841 .addMBB(LoopTestMBB);
25842
25843 // Adjust with: MV SP, TargetReg.
25844 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(RISCV::ADDI), SPReg)
25845 .addReg(TargetReg)
25846 .addImm(0);
25847
25848 ExitMBB->splice(ExitMBB->end(), MBB, std::next(MBBI), MBB->end());
25850
25851 LoopTestMBB->addSuccessor(ExitMBB);
25852 LoopTestMBB->addSuccessor(LoopTestMBB);
25853 MBB->addSuccessor(LoopTestMBB);
25854
25855 MI.eraseFromParent();
25856 MF.getInfo<RISCVMachineFunctionInfo>()->setDynamicAllocation();
25857 return ExitMBB->begin()->getParent();
25858}
25859
25861 if (Subtarget.hasStdExtFOrZfinx()) {
25862 static const MCPhysReg RCRegs[] = {RISCV::FRM, RISCV::FFLAGS};
25863 return RCRegs;
25864 }
25865 return {};
25866}
25867
25869 EVT VT = Y.getValueType();
25870
25871 if (VT.isVector())
25872 return false;
25873
25874 return VT.getSizeInBits() <= Subtarget.getXLen();
25875}
25876
25878 SDValue N1) const {
25879 if (!N0.hasOneUse())
25880 return false;
25881
25882 // Avoid reassociating expressions that can be lowered to vector
25883 // multiply accumulate (i.e. add (mul x, y), z)
25884 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::MUL &&
25885 (N0.getValueType().isVector() && Subtarget.hasVInstructions()))
25886 return false;
25887
25888 return true;
25889}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
static SDValue performSHLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
If the operand is a bitwise AND with a constant RHS, and the shift has a constant RHS and is the only...
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG)
static SDValue tryWidenMaskForShuffle(SDValue Op, SelectionDAG &DAG)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isConstant(const MachineInstr &MI)
AMDGPU Register Bank Select
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static constexpr unsigned long long mask(BlockVerifier::State S)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static InstructionCost getCost(Instruction &Inst, TTI::TargetCostKind CostKind, TargetTransformInfo &TTI, TargetLibraryInfo &TLI)
Definition CostModel.cpp:74
#define Check(C,...)
#define DEBUG_TYPE
#define im(i)
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define CC_VLS_CASE(ABIVlen)
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
This file provides utility analysis objects describing memory locations.
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static StringRef getName(Value *V)
static constexpr MCPhysReg SPReg
static StringRef getExtensionType(StringRef Ext)
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG)
static SDValue combinePExtTruncate(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB, unsigned RelOpcode, unsigned EqOpcode, const RISCVSubtarget &Subtarget)
static void processVCIXOperands(SDValue OrigOp, MutableArrayRef< SDValue > Operands, SelectionDAG &DAG)
static bool isLowSourceShuffle(ArrayRef< int > Mask, int Span)
Is this mask only using elements from the first span of the input?
static bool isZipOdd(const std::array< std::pair< int, int >, 2 > &SrcInfo, ArrayRef< int > Mask, unsigned &Factor)
Given a shuffle which can be represented as a pair of two slides, see if it is a zipodd idiom.
static SDValue lowerVZIP(unsigned Opc, SDValue Op0, SDValue Op1, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue performVECREDUCECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match v(f)slide1up/down idioms.
static SDValue combineTruncToVnclip(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< APInt > getExactInteger(const APFloat &APF, uint32_t BitWidth)
static SDValue performVP_TRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isInterleaveShuffle(ArrayRef< int > Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget)
Is this shuffle interleaving contiguous elements from one vector into the even elements and contiguou...
static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG)
According to the property that indexed load/store instructions zero-extend their indices,...
static SDValue getSingleShuffleSrc(MVT VT, SDValue V1, SDValue V2)
static unsigned getPACKOpcode(unsigned DestBW, const RISCVSubtarget &Subtarget)
static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Scalar, SDValue VL, SelectionDAG &DAG)
static bool isLegalBitRotate(ArrayRef< int > Mask, EVT VT, const RISCVSubtarget &Subtarget, MVT &RotateVT, unsigned &RotateAmt)
static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, SDValue Lo, SDValue Hi, SDValue VL, SelectionDAG &DAG)
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, SelectionDAG &DAG)
Creates an all ones mask suitable for masking a vector of type VecTy with vector length VL.
static SDValue simplifyOp_VL(SDNode *N)
static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isAlternating(const std::array< std::pair< int, int >, 2 > &SrcInfo, ArrayRef< int > Mask, unsigned Factor, bool RequiredPolarity)
static cl::opt< int > FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden, cl::desc("Give the maximum number of instructions that we will " "use for creating a floating-point immediate value"), cl::init(3))
static const RISCV::RISCVMaskedPseudoInfo * lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVVType::VLMUL LMul, unsigned SEW)
static SDValue expandMul(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue performVWADDSUBW_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask, Align BaseAlign, const RISCVSubtarget &ST)
Match the index of a gather or scatter operation as an operation with twice the element width and hal...
static SDValue combineOp_VLToVWOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
Combine a binary or FMA operation to its equivalent VW or VW_W form.
static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG)
static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, SelectionDAG &DAG)
static const unsigned ZvfbfaVPOps[]
static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< bool > ReassocShlAddiAdd("reassoc-shl-addi-add", cl::Hidden, cl::desc("Swap add and addi in cases where the add may " "be combined with a shift"), cl::init(true))
static SDValue lowerDisjointIndicesShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Given a shuffle where the indices are disjoint between the two sources, e.g.:
static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second, MachineBasicBlock *ThisMBB, const RISCVSubtarget &Subtarget)
static SDValue performINSERT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue lowerFABSorFNEG(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue foldReduceOperandViaVQDOT(SDValue InVec, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue reverseZExtICmpCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG)
static void promoteVCIXScalar(SDValue Op, MutableArrayRef< SDValue > Operands, SelectionDAG &DAG)
static SDValue tryDemorganOfBooleanCondition(SDValue Cond, SelectionDAG &DAG)
static SDValue performMemPairCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue combineDeMorganOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue reduceANDOfAtomicLoad(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static unsigned getRVVReductionOp(unsigned ISDOpcode)
static SDValue combineSubShiftToOrcB(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > NumRepeatedDivisors(DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden, cl::desc("Set the minimum number of repetitions of a divisor to allow " "transformation to multiplications by the reciprocal"), cl::init(2))
static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG)
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFixedVectorSegLoadIntrinsics(unsigned IntNo, SDValue Op, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineVectorMulToSraBitcast(SDNode *N, SelectionDAG &DAG)
static bool isLocalRepeatingShuffle(ArrayRef< int > Mask, int Span)
Is this mask local (i.e.
static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index, ISD::MemIndexType &IndexType, RISCVTargetLowering::DAGCombinerInfo &DCI)
static bool isSpanSplatShuffle(ArrayRef< int > Mask, int Span)
Return true for a mask which performs an arbitrary shuffle within the first span, and then repeats th...
static SDValue getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static SDValue combineOrToBitfieldInsert(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVCIXISDNodeVOID(SDValue Op, SelectionDAG &DAG, unsigned Type)
static unsigned getRISCVVLOp(SDValue Op)
Get a RISC-V target specified VL op for a given SDNode.
static unsigned getVecReduceOpcode(unsigned Opc)
Given a binary operator, return the associative generic ISD::VECREDUCE_OP which corresponds to it.
static std::pair< SDValue, SDValue > getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static const unsigned ZvfbfaOps[]
static bool isPromotedOpNeedingSplit(SDValue Op, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INT_SATCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT, SDValue StartValue, SDValue Vec, SDValue Mask, SDValue VL, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Helper to lower a reduction sequence of the form: scalar = reduce_op vec, scalar_start.
static SDValue expandMulToAddOrSubOfShl(SDNode *N, SelectionDAG &DAG, uint64_t MulAmt)
static SDValue performVP_REVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::pair< SDValue, SDValue > getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getVLOperand(SDValue Op)
static SDValue performVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue performVP_STORECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, const RISCVSubtarget &Subtarget)
static SDValue getLargeExternalSymbol(ExternalSymbolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
const uint64_t ModeMask64
static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static cl::opt< unsigned > ExtensionMaxWebSize(DEBUG_TYPE "-ext-max-web-size", cl::Hidden, cl::desc("Give the maximum size (in number of nodes) of the web of " "instructions that we will consider for VW expansion"), cl::init(18))
static SDValue combineShlAddIAddImpl(SDNode *N, SDValue AddI, SDValue Other, SelectionDAG &DAG)
static SDValue getDeinterleaveShiftAndTrunc(const SDLoc &DL, MVT VT, SDValue Src, unsigned Factor, unsigned Index, SelectionDAG &DAG)
static SDValue combineBinOpOfZExt(SDNode *N, SelectionDAG &DAG)
static bool matchSelectAddSub(SDValue TrueVal, SDValue FalseVal, bool &SwapCC)
static SDValue performSIGN_EXTEND_INREGCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue combineXorToBitfieldInsert(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineANDOfSETCCToCZERO(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< MVT > getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool useRVVForFixedLengthVectorVT(MVT VT, const RISCVSubtarget &Subtarget)
static bool isValidVisniInsertExtractIndex(SDValue Idx)
static Value * useTpOffset(IRBuilderBase &IRB, unsigned Offset)
static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG)
static SDValue getZeroPaddedAdd(const SDLoc &DL, SDValue A, SDValue B, SelectionDAG &DAG)
Given fixed length vectors A and B with equal element types, but possibly different number of element...
const uint32_t ModeMask32
static SDValue combineTruncOfSraSext(SDNode *N, SelectionDAG &DAG)
static SDValue getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask, SDValue VL, unsigned Policy=RISCVVType::TAIL_UNDISTURBED_MASK_UNDISTURBED)
static MachineBasicBlock * emitSplitF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const RISCVSubtarget &Subtarget)
static SDValue combineVqdotAccum(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static MachineBasicBlock * emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned CVTXOpc)
static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG)
static SDValue combineToVCPOP(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc)
static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaVID(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, SDValue TrueVal, SDValue FalseVal, bool Swapped)
#define VP_CASE(NODE)
static SDValue lowerBitreverseShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask, SmallVector< int > &ShuffleMask)
Match the index vector of a scatter or gather node as the shuffle mask which performs the rearrangeme...
static SDValue performVFMADD_VLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue lowerFixedVectorSegStoreIntrinsics(unsigned IntNo, SDValue Op, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue SplitVPOp(SDValue Op, SelectionDAG &DAG)
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue lowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static std::optional< VIDSequence > isSimpleVIDSequence(SDValue Op, unsigned EltSizeInBits)
static SDValue getVCIXISDNodeWCHAIN(SDValue Op, SelectionDAG &DAG, unsigned Type)
static SDValue lowerVectorXRINT_XROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC)
static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isSimm12Constant(SDValue V)
static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc)
static SDValue lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineTruncSelectToSMaxUSat(SDNode *N, SelectionDAG &DAG)
static bool isElementRotate(const std::array< std::pair< int, int >, 2 > &SrcInfo, unsigned NumElts)
static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineSubOfBoolean(SDNode *N, SelectionDAG &DAG)
static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isValidEGW(int EGS, EVT VT, const RISCVSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsVRGatherVX(ShuffleVectorSDNode *SVN, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
Match a single source shuffle which is an identity except that some particular element is repeated.
static bool isNonZeroAVL(SDValue AVL)
static SDValue lowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue getShlAddShlAdd(SDNode *N, SelectionDAG &DAG, unsigned ShX, unsigned ShY, bool AddX, unsigned Shift)
static MVT getQDOTXResultType(MVT OpVT)
static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef< int > Mask, const RISCVSubtarget &Subtarget, SelectionDAG &DAG)
static SDValue getLargeGlobalAddress(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG)
static MachineBasicBlock * emitReadCounterWidePseudo(MachineInstr &MI, MachineBasicBlock *BB)
static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index, const SDLoc &DL, SelectionDAG &DAG)
static cl::opt< bool > AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden, cl::desc("Allow the formation of VW_W operations (e.g., " "VWADD_W) with splat constants"), cl::init(false))
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static SDValue foldConcatVector(SDValue V1, SDValue V2)
If concat_vector(V1,V2) could be folded away to some existing vector source, return it.
static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1, LSBaseSDNode *LSNode2, SDValue BasePtr, uint64_t Imm)
static std::tuple< unsigned, SDValue, SDValue > getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT, const RISCVSubtarget &Subtarget)
static SDValue performFP_TO_INTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget)
static SDValue combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Perform two related transforms whose purpose is to incrementally recognize an explode_vector followed...
static SDValue lowerBuildVectorViaPacking(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Double the element size of the build vector to reduce the number of vslide1down in the build vector c...
static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerSelectToBinOp(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue combineShlAddIAdd(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static SDValue lowerBuildVectorViaDominantValues(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try and optimize BUILD_VECTORs with "dominant values" - these are values which constitute a large pro...
static bool isCompressMask(ArrayRef< int > Mask)
static SDValue expandMulToNAFSequence(SDNode *N, SelectionDAG &DAG, uint64_t MulAmt)
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static bool isZipEven(const std::array< std::pair< int, int >, 2 > &SrcInfo, ArrayRef< int > Mask, unsigned &Factor)
Given a shuffle which can be represented as a pair of two slides, see if it is a zipeven idiom.
static SDValue expandMulToShlAddShlAdd(SDNode *N, SelectionDAG &DAG, uint64_t MulAmt, unsigned Shift)
static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try to map an integer comparison with size > XLEN to vector instructions before type legalization spl...
static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
If we have a build_vector where each lane is binop X, C, where C is a constant (but not necessarily t...
#define OP_CASE(NODE)
static SDValue combineOrAndToBitfieldInsert(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
static LLT getMaskTypeFor(LLT VecTy)
Return the type of the mask type suitable for masking the provided vector type.
static unsigned getRISCVWOpcode(unsigned Opcode)
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
#define ROTR(x, n)
Definition SHA256.cpp:32
static bool isCommutative(Instruction *I, Value *ValWithUses, bool IsCopyable=false)
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static constexpr int Concat[]
Value * RHS
Value * LHS
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static LLVM_ABI unsigned int semanticsPrecision(const fltSemantics &)
Definition APFloat.cpp:290
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1329
opStatus convertToInteger(MutableArrayRef< integerPart > Input, unsigned int Width, bool IsSigned, roundingMode RM, bool *IsExact) const
Definition APFloat.h:1314
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition APFloat.h:1091
Class for arbitrary precision integers.
Definition APInt.h:78
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:450
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1541
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1386
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1513
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1331
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1202
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1183
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1489
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
LLVM_ABI APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
Definition APInt.cpp:1644
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1397
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1640
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:436
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1532
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:397
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition APInt.h:511
LLVM_ABI APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition APInt.cpp:1736
bool isMask(unsigned numBits) const
Definition APInt.h:489
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:335
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1258
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition APInt.h:1131
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1389
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:482
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1563
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1222
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
an instruction to allocate memory on the stack
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
bool isFloatingPointOperation() const
BinOp getOperation() const
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
This is an SDNode representing atomic operations.
const SDValue & getBasePtr() const
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
static LLVM_ABI BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
bool test(unsigned Idx) const
Definition BitVector.h:480
BitVector & set()
Definition BitVector.h:370
bool all() const
all - Returns true if all bits are set.
Definition BitVector.h:194
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI SDValue getSplatValue(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted value or a null value if this is not a splat.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
int64_t getLocMemOffset() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:231
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition DataLayout.h:490
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:123
ValueT & at(const_arg_type_t< KeyT > Val)
at - Return the entry for the specified key, or abort if no such entry exists.
Definition DenseMap.h:224
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:256
unsigned size() const
Definition DenseMap.h:110
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
Diagnostic information for unsupported feature in backend.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition TypeSize.h:312
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
Tagged union holding either a T or a Error.
Definition Error.h:485
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:765
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:777
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
Argument * getArg(unsigned i) const
Definition Function.h:884
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:730
Helper struct to store a base, index and offset that forms an address.
bool isDSOLocal() const
bool hasExternalWeakLinkage() const
Module * getParent()
Get the module that this global value is contained inside of...
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition IRBuilder.h:1939
BasicBlock * GetInsertBlock() const
Definition IRBuilder.h:201
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2511
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:605
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition IRBuilder.h:552
static InstructionCost getInvalid(CostType Val=0)
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Base class for LoadSDNode and StoreSDNode.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
static LocationSize precise(uint64_t Value)
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
MCContext & getContext() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
Metadata node.
Definition Metadata.h:1078
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1442
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
bool isRISCVVectorTuple() const
Return true if this is a RISCV vector tuple type where the runtime length is machine dependent.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
static MVT getRISCVVectorTupleVT(unsigned Sz, unsigned NFields)
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static MVT getScalableVectorVT(MVT VT, unsigned NumElements)
unsigned getRISCVVectorTupleNumFields() const
Given a RISC-V vector tuple type, return the num_fields.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
bool bitsLT(MVT VT) const
Return true if this has less bits than VT.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
LLVM_ABI const fltSemantics & getFltSemantics() const
Returns an APFloat semantics tag appropriate for the value type.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool bitsGE(MVT VT) const
Return true if this has no less bits than VT.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
bool isValid() const
Return true if this is a valid simple valuetype.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
static auto integer_scalable_vector_valuetypes()
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
static auto fp_fixedlen_vector_valuetypes()
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Instructions::iterator instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
void setFlag(MIFlag Flag)
Set a MI flag.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
A description of a memory reference used in the backend.
const MDNode * getRanges() const
Return the range tag for the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
This is an abstract virtual class for memory operations.
Align getAlign() const
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
Metadata * getModuleFlag(StringRef Key) const
Return the corresponding value if Key appears in module flags, otherwise return null.
Definition Module.cpp:353
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
A RISCV-specific constant pool value.
static RISCVConstantPoolValue * Create(const GlobalValue *GV)
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
unsigned getMaxLMULForFixedLengthVectors() const
bool hasVInstructionsI64() const
bool hasVInstructionsF64() const
bool hasStdExtZfhOrZhinx() const
bool hasShlAdd(int64_t ShAmt) const
unsigned getRealMinVLen() const
bool useRVVForFixedLengthVectors() const
bool hasVInstructionsBF16Minimal() const
bool hasVInstructionsF16Minimal() const
unsigned getXLen() const
bool hasConditionalMoveFusion() const
bool hasVInstructionsF16() const
bool hasVInstructionsBF16() const
unsigned getMaxBuildIntsCost() const
bool hasVInstructions() const
bool isRegisterReservedByUser(Register i) const override
std::optional< unsigned > getRealVLen() const
bool useConstantPoolForLargeInts() const
unsigned getRealMaxVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVInstrInfo * getInstrInfo() const override
bool hasBEXTILike() const
const RISCVTargetLowering * getTargetLowering() const override
bool enablePExtCodeGen() const
bool hasVInstructionsF32() const
bool hasCZEROLike() const
unsigned getELen() const
unsigned getFLen() const
static std::pair< unsigned, unsigned > computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget)
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
ArrayRef< MCPhysReg > getRoundingControlRegisters() const override
Returns a 0 terminated array of rounding control registers that can be attached into strict FP call.
static MVT getM1VT(MVT VT)
Given a vector (either fixed or scalable), return the scalable vector corresponding to a vector regis...
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI)
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
const Constant * getTargetConstantFromLoad(LoadSDNode *LD) const override
This method returns the constant pool value that will be loaded by LD.
const RISCVSubtarget & getSubtarget() const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool preferScalarizeSplat(SDNode *N) const override
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Return true if it is beneficial to convert a load of a constant to just the constant itself.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the register type for a given MVT, ensuring vectors are treated as a series of gpr sized integ...
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
ISD::NodeType getExtendForAtomicRMWArg(unsigned Op) const override
Returns how the platform's atomic rmw operations expect their input argument to be extended (ZERO_EXT...
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const override
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
bool hasBitTest(SDValue X, SDValue Y) const override
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool shouldExpandCttzElements(EVT VT) const override
Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
bool fallBackToDAGISel(const Instruction &Inst) const override
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool isCtpopFast(EVT VT) const override
Return true if ctpop instruction is fast.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
MVT getContainerForFixedLengthVector(MVT VT) const
static unsigned getRegClassIDForVecVT(MVT VT)
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
MachineBasicBlock * emitDynamicProbedAlloc(MachineInstr &MI, MachineBasicBlock *MBB) const
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override
Should we tranform the IR-optimal check for whether given truncation down into KeptBits would be trun...
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this functions.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Returns the register with the specified architectural or ABI name.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const override
Control the following reassociation of operands: (op (op x, c1), y) -> (op (op x, y),...
bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override
There are two ways to clear extreme bits (either low or high): Mask: x & (-1 << y) (the instcombine c...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override
Return the maximum number of "x & (x - 1)" operations that can be done instead of deferring to a cust...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool isLegalElementTypeForRVV(EVT ScalarTy) const
bool isLegalFirstFaultLoad(EVT DataType, Align Alignment) const
Return true if a fault-only-first load of the given result type and alignment is legal.
bool isVScaleKnownToBeAPowerOfTwo() const override
Return true only if vscale must be a power of two.
int getLegalZfaFPImm(const APFloat &Imm, EVT VT) const
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Return the number of registers for a given MVT, ensuring vectors are treated as a series of gpr sized...
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
MachineInstr * EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override
bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override
Return true if Op can create undef or poison from non-undef & non-poison operands.
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Customize the preferred legalization strategy for certain types.
SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const override
Expands target specific indirect branch for the case of JumpTable expansion.
static unsigned getRegClassIDForLMUL(RISCVVType::VLMUL LMul)
unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const override
Return the number of registers for a given MVT, for inline assembly.
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const
Return true if a stride load store of the given result type and alignment is legal.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
static bool isSpreadMask(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Match a mask which "spreads" the leading elements of a vector evenly across the result.
static RISCVVType::VLMUL getLMUL(MVT VT)
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X, SDValue Y) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
unsigned getStackProbeSize(const MachineFunction &MF, Align StackAlign) const
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
std::optional< APInt > bitcastToAPInt() const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
iterator_range< user_iterator > users()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
virtual bool isTargetStrictFPOpcode(unsigned Opcode) const
Returns true if a node with the given target-specific opcode has strict floating-point semantics.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC)
LLVM_ABI Align getReducedAlign(EVT VT, bool UseABI)
In most cases this function returns the ABI alignment for a given type, except for illegal vector typ...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtractVectorElt(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Extract element at Idx from Vec.
LLVM_ABI unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
LLVM_ABI SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT, SDNodeFlags Flags)
Get the (commutative) neutral element for the given opcode, if it exists.
LLVM_ABI SDValue getAtomicLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT MemVT, EVT VT, SDValue Chain, SDValue Ptr, MachineMemOperand *MMO)
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue getStridedLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &DL, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, bool IsExpanding=false)
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
LLVM_ABI SDValue getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI bool shouldOptForSize() const
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
LLVM_ABI SDValue getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, SDValue Mask, SDValue EVL)
Convert a vector-predicated Op, which must be an integer vector, to the vector-type VT,...
const TargetLowering & getTargetLoweringInfo() const
LLVM_ABI SDValue getStridedStoreVP(SDValue Chain, const SDLoc &DL, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Stride, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
bool NewNodesMustHaveLegalTypes
When true, additional steps are taken to ensure that getConstant() and similar functions return DAG n...
LLVM_ABI std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
LLVM_ABI SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
const SelectionDAGTargetInfo & getSelectionDAGInfo() const
LLVM_ABI SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getTypeSize(const SDLoc &DL, EVT VT, TypeSize TS)
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getInsertVectorElt(const SDLoc &DL, SDValue Vec, SDValue Elt, unsigned Idx)
Insert Elt into Vec at offset Idx.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
LLVM_ABI std::pair< SDValue, SDValue > SplitEVL(SDValue N, EVT VecVT, const SDLoc &DL)
Split the explicit vector length parameter of a VP operation.
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI SDValue getLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, const MDNode *Ranges=nullptr, bool IsExpanding=false)
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
void addCallSiteInfo(const SDNode *Node, CallSiteInfo &&CallInfo)
Set CallSiteInfo to be associated with Node.
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
LLVM_ABI SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
LLVM_ABI SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
static LLVM_ABI bool isSelectMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
static LLVM_ABI bool isBitRotateMask(ArrayRef< int > Mask, unsigned EltSizeInBits, unsigned MinSubElts, unsigned MaxSubElts, unsigned &NumSubElts, unsigned &RotateAmt)
Checks if the shuffle is a bit rotation of the first operand across multiple subelements,...
static LLVM_ABI bool isSingleSourceMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static LLVM_ABI bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
static LLVM_ABI bool isInsertSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &NumSubElts, int &Index)
Return true if this shuffle mask is an insert subvector mask.
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
static LLVM_ABI bool isSplatMask(ArrayRef< int > Mask)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
pointer data()
Return a pointer to the vector's buffer, even if empty().
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:854
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
LLVM_ABI std::string lower() const
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Information about stack frame layout on the target.
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
void setMaxDivRemBitWidthSupported(unsigned SizeInBits)
Set the size in bits of the maximum div/rem the backend supports.
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual unsigned getMinimumJumpTableEntries() const
Return lower limit for number of blocks in a jump table.
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
unsigned MaxGluedStoresPerMemcpy
Specify max number of store instructions to glue in inlined memcpy.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const
Return the number of registers that this ValueType will eventually require.
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
unsigned getMaxDivRemBitWidthSupported() const
Returns the size in bits of the maximum div/rem the backend supports.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setPartialReduceMLAAction(unsigned Opc, MVT AccVT, MVT InputVT, LegalizeAction Action)
Indicate how a PARTIAL_REDUCE_U/SMLA node with Acc type AccVT and Input type InputVT should be treate...
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
This function returns true if the memory access is aligned or if the target allows this specific unal...
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual MVT getVPExplicitVectorLengthTy() const
Returns the type to be used for the EVL/AVL operand of VP nodes: ISD::VP_ADD, ISD::VP_SUB,...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Build sdiv by power-of-2 with conditional move instructions Ref: "Hacker's Delight" by Henry Warren 1...
virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, int JTI, SelectionDAG &DAG) const
Expands target specific indirect branch for the case of JumpTable expansion.
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const Triple & getTargetTriple() const
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
const MCSubtargetInfo * getMCSubtargetInfo() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
virtual TargetLoweringObjectFile * getObjFileLowering() const
TargetOptions Options
unsigned EmitCallGraphSection
Emit section containing call graph metadata.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual bool isRegisterReservedByUser(Register R) const
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Target - Wrapper for Target specific information.
bool isOSBinFormatCOFF() const
Tests whether the OS uses the COFF binary format.
Definition Triple.h:790
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI Type * getStructElementType(unsigned N) const
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
Definition Type.cpp:61
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:261
LLVM_ABI bool isRISCVVectorTupleTy() const
Definition Type.cpp:146
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
bool isTargetExtTy() const
Return true if this is a target extension type.
Definition Type.h:203
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:180
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:230
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition TypeSize.h:256
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
constexpr bool isZero() const
Definition TypeSize.h:153
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
self_iterator getIterator()
Definition ilist_node.h:123
#define INT64_MIN
Definition DataTypes.h:74
#define INT64_MAX
Definition DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ RISCV_VectorCall
Calling convention used for RISC-V V-extension.
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
@ GRAAL
Used by GraalVM. Two additional registers are reserved.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:807
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:780
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:504
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ SET_FPENV
Sets the current floating-point environment.
@ PARTIAL_REDUCE_SMLA
@ VECREDUCE_SEQ_FADD
Generic reduction nodes.
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:593
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:771
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ RESET_FPENV
Set floating-point environment to default state.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ SET_FPMODE
Sets the current dynamic floating-point control modes.
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:841
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ FMODF
FMODF - Decomposes the operand into integral and fractional parts, each having the same type and sign...
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:868
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:577
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:744
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
@ RESET_FPMODE
Sets default dynamic floating-point control modes.
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:275
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition ISDOpcodes.h:991
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:981
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:431
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SET_ROUNDING
Set rounding mode.
Definition ISDOpcodes.h:963
@ PARTIAL_REDUCE_UMLA
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:712
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:478
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:662
@ READSTEADYCOUNTER
READSTEADYCOUNTER - This corresponds to the readfixedcounter intrinsic.
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:779
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ BR_JT
BR_JT - Jumptable branch.
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2, ...) - Returns N vectors from N input vectors, where N is the factor to...
Definition ISDOpcodes.h:628
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition ISDOpcodes.h:688
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:534
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:541
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:784
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:242
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:669
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:343
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition ISDOpcodes.h:958
@ STRICT_FP_TO_FP16
Definition ISDOpcodes.h:994
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:701
@ GET_FPMODE
Reads the current dynamic floating-point control modes.
@ STRICT_FP16_TO_FP
Definition ISDOpcodes.h:993
@ GET_FPENV
Gets the current floating-point environment.
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:642
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:607
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:569
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:799
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition ISDOpcodes.h:379
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:876
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:724
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
Definition ISDOpcodes.h:633
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:966
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:793
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:477
@ MGATHER
Masked gather and scatter - load and store operations for a vector of random addresses with additiona...
@ STRICT_FROUNDEVEN
Definition ISDOpcodes.h:457
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:145
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:471
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:493
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:470
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:914
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:498
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:707
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:420
@ SPLAT_VECTOR_PARTS
SPLAT_VECTOR_PARTS(SCALAR1, SCALAR2, ...) - Returns a vector with the scalar values joined together a...
Definition ISDOpcodes.h:678
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:558
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ VECTOR_SPLICE
VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as VEC1/VEC2 from CONCAT_VECTOR...
Definition ISDOpcodes.h:654
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:947
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
Definition ISDOpcodes.h:696
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:451
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:933
@ VECREDUCE_FMINIMUM
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:844
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ BRCOND
BRCOND - Conditional branch.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:821
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:527
@ PARTIAL_REDUCE_SUMLA
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2, ...) - Returns N vectors from N input vectors, where N is the factor ...
Definition ISDOpcodes.h:617
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ TRUNCATE_SSAT_S
TRUNCATE_[SU]SAT_[SU] - Truncate for saturated operand [SU] located in middle, prefix for SAT means i...
Definition ISDOpcodes.h:859
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:719
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
@ TRUNCATE_USAT_U
Definition ISDOpcodes.h:863
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:549
LLVM_ABI bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isExtOpcode(unsigned Opcode)
LLVM_ABI bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
LLVM_ABI std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
LLVM_ABI bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
LLVM_ABI bool isVPOpcode(unsigned Opcode)
Whether this is a vector-predicated Opcode.
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
match_combine_or< BinaryOp_match< LHS, RHS, Instruction::Add >, DisjointOr_match< LHS, RHS > > m_AddLike(const LHS &L, const RHS &R)
Match either "add" or "or disjoint".
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
unsigned getBrCond(CondCode CC, unsigned SelectOpc=0)
static RISCVVType::VLMUL getLMul(uint64_t TSFlags)
static int getFRMOpNum(const MCInstrDesc &Desc)
static unsigned getSEWOpNum(const MCInstrDesc &Desc)
int getLoadFPImm(APFloat FPImm)
getLoadFPImm - Return a 5-bit binary encoding of the floating-point immediate value.
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
SmallVector< Inst, 8 > InstSeq
Definition RISCVMatInt.h:43
static VLMUL encodeLMUL(unsigned LMUL, bool Fractional)
static unsigned decodeVSEW(unsigned VSEW)
LLVM_ABI std::pair< unsigned, bool > decodeVLMUL(VLMUL VLMul)
static unsigned encodeSEW(unsigned SEW)
static constexpr unsigned FPMASK_Negative_Zero
static constexpr unsigned FPMASK_Positive_Subnormal
static constexpr unsigned FPMASK_Positive_Normal
static constexpr unsigned FPMASK_Negative_Subnormal
static constexpr unsigned FPMASK_Negative_Normal
static constexpr unsigned FPMASK_Positive_Infinity
static constexpr unsigned FPMASK_Negative_Infinity
static constexpr unsigned FPMASK_Quiet_NaN
ArrayRef< MCPhysReg > getArgGPRs(const RISCVABI::ABI ABI)
static constexpr unsigned FPMASK_Signaling_NaN
static constexpr unsigned FPMASK_Positive_Zero
static constexpr unsigned RVVBitsPerBlock
static constexpr unsigned RVVBytesPerBlock
LLVM_ABI Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
auto m_ExactSr(const LHS &L, const RHS &R)
BinaryOpc_match< LHS, RHS > m_Srl(const LHS &L, const RHS &R)
auto m_SpecificVT(EVT RefVT, const Pattern &P)
Match a specific ValueType.
TernaryOpc_match< LHS, RHS, IDX > m_InsertSubvector(const LHS &Base, const RHS &Sub, const IDX &Idx)
Or< Preds... > m_AnyOf(const Preds &...preds)
auto m_Node(unsigned Opcode, const OpndPreds &...preds)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
ConstantInt_match m_ConstInt()
Match any integer constants or splat of an integer constant.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
@ System
Synchronized with respect to all concurrently executing threads.
Definition LLVMContext.h:58
@ CMOV
X86 conditional moves.
initializer< Ty > init(const Ty &Val)
uint32_t read32le(const void *P)
Definition Endian.h:432
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static const MachineMemOperand::Flags MONontemporalBit1
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
InstructionCost Cost
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2484
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
MCCodeEmitter * createRISCVMCCodeEmitter(const MCInstrInfo &MCII, MCContext &Ctx)
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:303
static const MachineMemOperand::Flags MONontemporalBit0
bool RISCVCCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
RISCVCCAssignFn - This target-specific function extends the default CCValAssign with additional infor...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1593
LLVM_ABI void reportFatalInternalError(Error Err)
Report a fatal error that indicates a bug in LLVM.
Definition Error.cpp:177
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:385
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:1980
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
bool CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
int isShifted359(T Value, int &Shift)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ Other
Any other memory.
Definition ModRef.h:68
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
CombineLevel
Definition DAGCombine.h:15
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI bool isMaskedSlidePair(ArrayRef< int > Mask, int NumElts, std::array< std::pair< int, int >, 2 > &SrcInfo)
Does this shuffle mask represent either one slide shuffle or a pair of two slide shuffles,...
@ Xor
Bitwise or logical XOR of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
RoundingMode
Rounding mode.
@ TowardZero
roundTowardZero.
@ NearestTiesToEven
roundTiesToEven.
@ TowardPositive
roundTowardPositive.
@ NearestTiesToAway
roundTiesToAway.
@ TowardNegative
roundTowardNegative.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1973
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1770
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1909
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
LLVM_ABI void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20
constexpr bool isShiftedUInt(uint64_t x)
Checks if a unsigned integer is an N bit number shifted left by S.
Definition MathExtras.h:198
LLVM_ABI bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
#define NC
Definition regutils.h:42
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
uint64_t getScalarStoreSize() const
Definition ValueTypes.h:402
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:284
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:300
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition ValueTypes.h:350
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:243
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:359
unsigned getRISCVVectorTupleNumFields() const
Given a RISCV vector tuple type, return the num_fields.
Definition ValueTypes.h:364
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
Definition ValueTypes.h:430
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
bool isRISCVVectorTuple() const
Return true if this is a vector value type.
Definition ValueTypes.h:179
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isFixedLengthVector() const
Definition ValueTypes.h:181
EVT getRoundIntegerType(LLVMContext &Context) const
Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...
Definition ValueTypes.h:419
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition ValueTypes.h:292
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:174
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:308
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
static LLVM_ABI KnownBits ashr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false, bool Exact=false)
Compute known bits for ashr(LHS, RHS).
static LLVM_ABI KnownBits urem(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits for urem(LHS, RHS).
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:66
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition KnownBits.h:274
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
Definition KnownBits.h:161
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:172
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:74
static LLVM_ABI KnownBits lshr(const KnownBits &LHS, const KnownBits &RHS, bool ShAmtNonZero=false, bool Exact=false)
Compute known bits for lshr(LHS, RHS).
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition KnownBits.h:296
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:311
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:180
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
Definition KnownBits.h:347
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:145
static LLVM_ABI KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS, bool Exact=false)
Compute known bits for udiv(LHS, RHS).
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition KnownBits.h:280
static LLVM_ABI KnownBits shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW=false, bool NSW=false, bool ShAmtNonZero=false)
Compute known bits for shl(LHS, RHS).
Matching combinators.
SmallVector< ArgRegPair, 1 > ArgRegPairs
Vector of call argument and its forwarding register.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
Register getFrameRegister(const MachineFunction &MF) const override
These are IR-level optimization flags that may be propagated to SDNodes.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI bool recursivelyDeleteUnusedNodes(SDNode *N)
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...