LLVM 23.0.0git
RISCVISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the RISC-V target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "RISCVISelDAGToDAG.h"
17#include "RISCVISelLowering.h"
18#include "RISCVInstrInfo.h"
22#include "llvm/IR/IntrinsicsRISCV.h"
24#include "llvm/Support/Debug.h"
27
28using namespace llvm;
29
30#define DEBUG_TYPE "riscv-isel"
31#define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
32
34 "riscv-use-rematerializable-movimm", cl::Hidden,
35 cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
36 "constant materialization"),
37 cl::init(false));
38
39#define GET_DAGISEL_BODY RISCVDAGToDAGISel
40#include "RISCVGenDAGISel.inc"
41
43 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
44
45 bool MadeChange = false;
46 while (Position != CurDAG->allnodes_begin()) {
47 SDNode *N = &*--Position;
48 if (N->use_empty())
49 continue;
50
51 SDValue Result;
52 switch (N->getOpcode()) {
53 case ISD::SPLAT_VECTOR: {
54 if (Subtarget->hasStdExtP())
55 break;
56 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
57 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
58 MVT VT = N->getSimpleValueType(0);
59 unsigned Opc =
60 VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
61 SDLoc DL(N);
62 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
63 SDValue Src = N->getOperand(0);
64 if (VT.isInteger())
65 Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(),
66 N->getOperand(0));
67 Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL);
68 break;
69 }
70 case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: {
71 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
72 // load. Done after lowering and combining so that we have a chance to
73 // optimize this to VMV_V_X_VL when the upper bits aren't needed.
74 assert(N->getNumOperands() == 4 && "Unexpected number of operands");
75 MVT VT = N->getSimpleValueType(0);
76 SDValue Passthru = N->getOperand(0);
77 SDValue Lo = N->getOperand(1);
78 SDValue Hi = N->getOperand(2);
79 SDValue VL = N->getOperand(3);
80 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
81 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
82 "Unexpected VTs!");
83 MachineFunction &MF = CurDAG->getMachineFunction();
84 SDLoc DL(N);
85
86 // Create temporary stack for each expanding node.
87 SDValue StackSlot =
88 CurDAG->CreateStackTemporary(TypeSize::getFixed(8), Align(8));
89 int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex();
91
92 SDValue Chain = CurDAG->getEntryNode();
93 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
94
95 SDValue OffsetSlot =
96 CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::getFixed(4), DL);
97 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
98 Align(8));
99
100 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
101
102 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
103 SDValue IntID =
104 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
105 SDValue Ops[] = {Chain,
106 IntID,
107 Passthru,
108 StackSlot,
109 CurDAG->getRegister(RISCV::X0, MVT::i64),
110 VL};
111
112 Result = CurDAG->getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
113 MVT::i64, MPI, Align(8),
115 break;
116 }
117 case ISD::FP_EXTEND: {
118 // We only have vector patterns for riscv_fpextend_vl in isel.
119 SDLoc DL(N);
120 MVT VT = N->getSimpleValueType(0);
121 if (!VT.isVector())
122 break;
123 SDValue VLMAX = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
124 SDValue TrueMask = CurDAG->getNode(
125 RISCVISD::VMSET_VL, DL, VT.changeVectorElementType(MVT::i1), VLMAX);
126 Result = CurDAG->getNode(RISCVISD::FP_EXTEND_VL, DL, VT, N->getOperand(0),
127 TrueMask, VLMAX);
128 break;
129 }
130 }
131
132 if (Result) {
133 LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: ");
134 LLVM_DEBUG(N->dump(CurDAG));
135 LLVM_DEBUG(dbgs() << "\nNew: ");
136 LLVM_DEBUG(Result->dump(CurDAG));
137 LLVM_DEBUG(dbgs() << "\n");
138
139 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
140 MadeChange = true;
141 }
142 }
143
144 if (MadeChange)
145 CurDAG->RemoveDeadNodes();
146}
147
149 HandleSDNode Dummy(CurDAG->getRoot());
150 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
151
152 bool MadeChange = false;
153 while (Position != CurDAG->allnodes_begin()) {
154 SDNode *N = &*--Position;
155 // Skip dead nodes and any non-machine opcodes.
156 if (N->use_empty() || !N->isMachineOpcode())
157 continue;
158
159 MadeChange |= doPeepholeSExtW(N);
160
161 // FIXME: This is here only because the VMerge transform doesn't
162 // know how to handle masked true inputs. Once that has been moved
163 // to post-ISEL, this can be deleted as well.
164 MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N));
165 }
166
167 CurDAG->setRoot(Dummy.getValue());
168
169 // After we're done with everything else, convert IMPLICIT_DEF
170 // passthru operands to NoRegister. This is required to workaround
171 // an optimization deficiency in MachineCSE. This really should
172 // be merged back into each of the patterns (i.e. there's no good
173 // reason not to go directly to NoReg), but is being done this way
174 // to allow easy backporting.
175 MadeChange |= doPeepholeNoRegPassThru();
176
177 if (MadeChange)
178 CurDAG->RemoveDeadNodes();
179}
180
181static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
183 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);
184 for (const RISCVMatInt::Inst &Inst : Seq) {
185 SDValue SDImm = CurDAG->getSignedTargetConstant(Inst.getImm(), DL, VT);
186 SDNode *Result = nullptr;
187 switch (Inst.getOpndKind()) {
188 case RISCVMatInt::Imm:
189 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm);
190 break;
192 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg,
193 CurDAG->getRegister(RISCV::X0, VT));
194 break;
196 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg);
197 break;
199 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm);
200 break;
201 }
202
203 // Only the first instruction has X0 as its source.
204 SrcReg = SDValue(Result, 0);
205 }
206
207 return SrcReg;
208}
209
210static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
211 int64_t Imm, const RISCVSubtarget &Subtarget) {
213
214 // Use a rematerializable pseudo instruction for short sequences if enabled.
215 if (Seq.size() == 2 && UsePseudoMovImm)
216 return SDValue(
217 CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT,
218 CurDAG->getSignedTargetConstant(Imm, DL, VT)),
219 0);
220
221 // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
222 // worst an LUI+ADDIW. This will require an extra register, but avoids a
223 // constant pool.
224 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
225 // low and high 32 bits are the same and bit 31 and 63 are set.
226 if (Seq.size() > 3) {
227 unsigned ShiftAmt, AddOpc;
229 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
230 if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
231 SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
232
233 SDValue SLLI = SDValue(
234 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
235 CurDAG->getTargetConstant(ShiftAmt, DL, VT)),
236 0);
237 return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0);
238 }
239 }
240
241 // Otherwise, use the original sequence.
242 return selectImmSeq(CurDAG, DL, VT, Seq);
243}
244
246 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
247 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
248 bool IsLoad, MVT *IndexVT) {
249 SDValue Chain = Node->getOperand(0);
250
251 Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.
252
253 if (IsStridedOrIndexed) {
254 Operands.push_back(Node->getOperand(CurOp++)); // Index.
255 if (IndexVT)
256 *IndexVT = Operands.back()->getSimpleValueType(0);
257 }
258
259 if (IsMasked) {
260 SDValue Mask = Node->getOperand(CurOp++);
261 Operands.push_back(Mask);
262 }
263 SDValue VL;
264 selectVLOp(Node->getOperand(CurOp++), VL);
265 Operands.push_back(VL);
266
267 MVT XLenVT = Subtarget->getXLenVT();
268 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
269 Operands.push_back(SEWOp);
270
271 // At the IR layer, all the masked load intrinsics have policy operands,
272 // none of the others do. All have passthru operands. For our pseudos,
273 // all loads have policy operands.
274 if (IsLoad) {
276 if (IsMasked)
277 Policy = Node->getConstantOperandVal(CurOp++);
278 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
279 Operands.push_back(PolicyOp);
280 }
281
282 Operands.push_back(Chain); // Chain.
283}
284
285void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked,
286 bool IsStrided) {
287 SDLoc DL(Node);
288 MVT VT = Node->getSimpleValueType(0);
289 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
291
292 unsigned CurOp = 2;
294
295 Operands.push_back(Node->getOperand(CurOp++));
296
297 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
298 Operands, /*IsLoad=*/true);
299
300 const RISCV::VLSEGPseudo *P =
301 RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,
302 static_cast<unsigned>(LMUL));
303 MachineSDNode *Load =
304 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
305
306 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
307
308 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
309 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1));
310 CurDAG->RemoveDeadNode(Node);
311}
312
314 bool IsMasked) {
315 SDLoc DL(Node);
316 MVT VT = Node->getSimpleValueType(0);
317 MVT XLenVT = Subtarget->getXLenVT();
318 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
320
321 unsigned CurOp = 2;
323
324 Operands.push_back(Node->getOperand(CurOp++));
325
326 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
327 /*IsStridedOrIndexed*/ false, Operands,
328 /*IsLoad=*/true);
329
330 const RISCV::VLSEGPseudo *P =
331 RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,
332 Log2SEW, static_cast<unsigned>(LMUL));
333 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
334 XLenVT, MVT::Other, Operands);
335
336 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
337
338 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0)); // Result
339 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1)); // VL
340 ReplaceUses(SDValue(Node, 2), SDValue(Load, 2)); // Chain
341 CurDAG->RemoveDeadNode(Node);
342}
343
344void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked,
345 bool IsOrdered) {
346 SDLoc DL(Node);
347 MVT VT = Node->getSimpleValueType(0);
348 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
350
351 unsigned CurOp = 2;
353
354 Operands.push_back(Node->getOperand(CurOp++));
355
356 MVT IndexVT;
357 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
358 /*IsStridedOrIndexed*/ true, Operands,
359 /*IsLoad=*/true, &IndexVT);
360
361#ifndef NDEBUG
362 // Number of element = RVVBitsPerBlock * LMUL / SEW
363 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
364 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
365 if (DecodedLMUL.second)
366 ContainedTyNumElts /= DecodedLMUL.first;
367 else
368 ContainedTyNumElts *= DecodedLMUL.first;
369 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
370 "Element count mismatch");
371#endif
372
374 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
375 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
376 reportFatalUsageError("The V extension does not support EEW=64 for index "
377 "values when XLEN=32");
378 }
379 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
380 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
381 static_cast<unsigned>(IndexLMUL));
382 MachineSDNode *Load =
383 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
384
385 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
386
387 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
388 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1));
389 CurDAG->RemoveDeadNode(Node);
390}
391
392void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked,
393 bool IsStrided) {
394 SDLoc DL(Node);
395 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
396 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
398
399 unsigned CurOp = 2;
401
402 Operands.push_back(Node->getOperand(CurOp++));
403
404 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
405 Operands);
406
407 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
408 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
409 MachineSDNode *Store =
410 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
411
412 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
413
414 ReplaceNode(Node, Store);
415}
416
417void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked,
418 bool IsOrdered) {
419 SDLoc DL(Node);
420 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
421 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
423
424 unsigned CurOp = 2;
426
427 Operands.push_back(Node->getOperand(CurOp++));
428
429 MVT IndexVT;
430 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
431 /*IsStridedOrIndexed*/ true, Operands,
432 /*IsLoad=*/false, &IndexVT);
433
434#ifndef NDEBUG
435 // Number of element = RVVBitsPerBlock * LMUL / SEW
436 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
437 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
438 if (DecodedLMUL.second)
439 ContainedTyNumElts /= DecodedLMUL.first;
440 else
441 ContainedTyNumElts *= DecodedLMUL.first;
442 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
443 "Element count mismatch");
444#endif
445
447 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
448 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
449 reportFatalUsageError("The V extension does not support EEW=64 for index "
450 "values when XLEN=32");
451 }
452 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
453 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
454 static_cast<unsigned>(IndexLMUL));
455 MachineSDNode *Store =
456 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
457
458 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
459
460 ReplaceNode(Node, Store);
461}
462
464 if (!Subtarget->hasVInstructions())
465 return;
466
467 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
468
469 SDLoc DL(Node);
470 MVT XLenVT = Subtarget->getXLenVT();
471
472 unsigned IntNo = Node->getConstantOperandVal(0);
473
474 assert((IntNo == Intrinsic::riscv_vsetvli ||
475 IntNo == Intrinsic::riscv_vsetvlimax) &&
476 "Unexpected vsetvli intrinsic");
477
478 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
479 unsigned Offset = (VLMax ? 1 : 2);
480
481 assert(Node->getNumOperands() == Offset + 2 &&
482 "Unexpected number of operands");
483
484 unsigned SEW =
485 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
486 RISCVVType::VLMUL VLMul = static_cast<RISCVVType::VLMUL>(
487 Node->getConstantOperandVal(Offset + 1) & 0x7);
488
489 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
490 /*MaskAgnostic*/ true);
491 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
492
493 SDValue VLOperand;
494 unsigned Opcode = RISCV::PseudoVSETVLI;
495 if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
496 if (auto VLEN = Subtarget->getRealVLen())
497 if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())
498 VLMax = true;
499 }
500 if (VLMax || isAllOnesConstant(Node->getOperand(1))) {
501 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
502 Opcode = RISCV::PseudoVSETVLIX0;
503 } else {
504 VLOperand = Node->getOperand(1);
505
506 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
507 uint64_t AVL = C->getZExtValue();
508 if (isUInt<5>(AVL)) {
509 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
510 ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL,
511 XLenVT, VLImm, VTypeIOp));
512 return;
513 }
514 }
515 }
516
518 CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp));
519}
520
522 if (!Subtarget->hasVendorXSfmmbase())
523 return;
524
525 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
526
527 SDLoc DL(Node);
528 MVT XLenVT = Subtarget->getXLenVT();
529
530 unsigned IntNo = Node->getConstantOperandVal(0);
531
532 assert((IntNo == Intrinsic::riscv_sf_vsettnt ||
533 IntNo == Intrinsic::riscv_sf_vsettm ||
534 IntNo == Intrinsic::riscv_sf_vsettk) &&
535 "Unexpected XSfmm vset intrinsic");
536
537 unsigned SEW = RISCVVType::decodeVSEW(Node->getConstantOperandVal(2));
538 unsigned Widen = RISCVVType::decodeTWiden(Node->getConstantOperandVal(3));
539 unsigned PseudoOpCode =
540 IntNo == Intrinsic::riscv_sf_vsettnt ? RISCV::PseudoSF_VSETTNT
541 : IntNo == Intrinsic::riscv_sf_vsettm ? RISCV::PseudoSF_VSETTM
542 : RISCV::PseudoSF_VSETTK;
543
544 if (IntNo == Intrinsic::riscv_sf_vsettnt) {
545 unsigned VTypeI = RISCVVType::encodeXSfmmVType(SEW, Widen, 0);
546 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
547
548 ReplaceNode(Node, CurDAG->getMachineNode(PseudoOpCode, DL, XLenVT,
549 Node->getOperand(1), VTypeIOp));
550 } else {
551 SDValue Log2SEW = CurDAG->getTargetConstant(Log2_32(SEW), DL, XLenVT);
552 SDValue TWiden = CurDAG->getTargetConstant(Widen, DL, XLenVT);
554 CurDAG->getMachineNode(PseudoOpCode, DL, XLenVT,
555 Node->getOperand(1), Log2SEW, TWiden));
556 }
557}
558
560 MVT VT = Node->getSimpleValueType(0);
561 unsigned Opcode = Node->getOpcode();
562 assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&
563 "Unexpected opcode");
564 SDLoc DL(Node);
565
566 // For operations of the form (x << C1) op C2, check if we can use
567 // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
568 SDValue N0 = Node->getOperand(0);
569 SDValue N1 = Node->getOperand(1);
570
572 if (!Cst)
573 return false;
574
575 int64_t Val = Cst->getSExtValue();
576
577 // Check if immediate can already use ANDI/ORI/XORI.
578 if (isInt<12>(Val))
579 return false;
580
581 SDValue Shift = N0;
582
583 // If Val is simm32 and we have a sext_inreg from i32, then the binop
584 // produces at least 33 sign bits. We can peek through the sext_inreg and use
585 // a SLLIW at the end.
586 bool SignExt = false;
587 if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
588 N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) {
589 SignExt = true;
590 Shift = N0.getOperand(0);
591 }
592
593 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
594 return false;
595
597 if (!ShlCst)
598 return false;
599
600 uint64_t ShAmt = ShlCst->getZExtValue();
601
602 // Make sure that we don't change the operation by removing bits.
603 // This only matters for OR and XOR, AND is unaffected.
604 uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt);
605 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
606 return false;
607
608 int64_t ShiftedVal = Val >> ShAmt;
609 if (!isInt<12>(ShiftedVal))
610 return false;
611
612 // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
613 if (SignExt && ShAmt >= 32)
614 return false;
615
616 // Ok, we can reorder to get a smaller immediate.
617 unsigned BinOpc;
618 switch (Opcode) {
619 default: llvm_unreachable("Unexpected opcode");
620 case ISD::AND: BinOpc = RISCV::ANDI; break;
621 case ISD::OR: BinOpc = RISCV::ORI; break;
622 case ISD::XOR: BinOpc = RISCV::XORI; break;
623 }
624
625 unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;
626
627 SDNode *BinOp = CurDAG->getMachineNode(
628 BinOpc, DL, VT, Shift.getOperand(0),
629 CurDAG->getSignedTargetConstant(ShiftedVal, DL, VT));
630 SDNode *SLLI =
631 CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0),
632 CurDAG->getTargetConstant(ShAmt, DL, VT));
633 ReplaceNode(Node, SLLI);
634 return true;
635}
636
638 unsigned Opc;
639
640 if (Subtarget->hasVendorXTHeadBb())
641 Opc = RISCV::TH_EXT;
642 else if (Subtarget->hasVendorXAndesPerf())
643 Opc = RISCV::NDS_BFOS;
644 else if (Subtarget->hasVendorXqcibm())
645 Opc = RISCV::QC_EXT;
646 else
647 // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment.
648 return false;
649
650 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
651 if (!N1C)
652 return false;
653
654 SDValue N0 = Node->getOperand(0);
655 if (!N0.hasOneUse())
656 return false;
657
658 auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb,
659 const SDLoc &DL, MVT VT) {
660 if (Opc == RISCV::QC_EXT) {
661 // QC.EXT X, width, shamt
662 // shamt is the same as Lsb
663 // width is the number of bits to extract from the Lsb
664 Msb = Msb - Lsb + 1;
665 }
666 return CurDAG->getMachineNode(Opc, DL, VT, N0.getOperand(0),
667 CurDAG->getTargetConstant(Msb, DL, VT),
668 CurDAG->getTargetConstant(Lsb, DL, VT));
669 };
670
671 SDLoc DL(Node);
672 MVT VT = Node->getSimpleValueType(0);
673 const unsigned RightShAmt = N1C->getZExtValue();
674
675 // Transform (sra (shl X, C1) C2) with C1 < C2
676 // -> (SignedBitfieldExtract X, msb, lsb)
677 if (N0.getOpcode() == ISD::SHL) {
678 auto *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
679 if (!N01C)
680 return false;
681
682 const unsigned LeftShAmt = N01C->getZExtValue();
683 // Make sure that this is a bitfield extraction (i.e., the shift-right
684 // amount can not be less than the left-shift).
685 if (LeftShAmt > RightShAmt)
686 return false;
687
688 const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
689 const unsigned Msb = MsbPlusOne - 1;
690 const unsigned Lsb = RightShAmt - LeftShAmt;
691
692 SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT);
693 ReplaceNode(Node, Sbe);
694 return true;
695 }
696
697 // Transform (sra (sext_inreg X, _), C) ->
698 // (SignedBitfieldExtract X, msb, lsb)
699 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
700 unsigned ExtSize =
701 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
702
703 // ExtSize of 32 should use sraiw via tablegen pattern.
704 if (ExtSize == 32)
705 return false;
706
707 const unsigned Msb = ExtSize - 1;
708 // If the shift-right amount is greater than Msb, it means that extracts
709 // the X[Msb] bit and sign-extend it.
710 const unsigned Lsb = RightShAmt > Msb ? Msb : RightShAmt;
711
712 SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT);
713 ReplaceNode(Node, Sbe);
714 return true;
715 }
716
717 return false;
718}
719
721 // Only supported with XAndesPerf at the moment.
722 if (!Subtarget->hasVendorXAndesPerf())
723 return false;
724
725 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
726 if (!N1C)
727 return false;
728
729 SDValue N0 = Node->getOperand(0);
730 if (!N0.hasOneUse())
731 return false;
732
733 auto BitfieldInsert = [&](SDValue N0, unsigned Msb, unsigned Lsb,
734 const SDLoc &DL, MVT VT) {
735 unsigned Opc = RISCV::NDS_BFOS;
736 // If the Lsb is equal to the Msb, then the Lsb should be 0.
737 if (Lsb == Msb)
738 Lsb = 0;
739 return CurDAG->getMachineNode(Opc, DL, VT, N0.getOperand(0),
740 CurDAG->getTargetConstant(Lsb, DL, VT),
741 CurDAG->getTargetConstant(Msb, DL, VT));
742 };
743
744 SDLoc DL(Node);
745 MVT VT = Node->getSimpleValueType(0);
746 const unsigned RightShAmt = N1C->getZExtValue();
747
748 // Transform (sra (shl X, C1) C2) with C1 > C2
749 // -> (NDS.BFOS X, lsb, msb)
750 if (N0.getOpcode() == ISD::SHL) {
751 auto *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
752 if (!N01C)
753 return false;
754
755 const unsigned LeftShAmt = N01C->getZExtValue();
756 // Make sure that this is a bitfield insertion (i.e., the shift-right
757 // amount should be less than the left-shift).
758 if (LeftShAmt <= RightShAmt)
759 return false;
760
761 const unsigned MsbPlusOne = VT.getSizeInBits() - RightShAmt;
762 const unsigned Msb = MsbPlusOne - 1;
763 const unsigned Lsb = LeftShAmt - RightShAmt;
764
765 SDNode *Sbi = BitfieldInsert(N0, Msb, Lsb, DL, VT);
766 ReplaceNode(Node, Sbi);
767 return true;
768 }
769
770 return false;
771}
772
774 const SDLoc &DL, MVT VT,
775 SDValue X, unsigned Msb,
776 unsigned Lsb) {
777 unsigned Opc;
778
779 if (Subtarget->hasVendorXTHeadBb()) {
780 Opc = RISCV::TH_EXTU;
781 } else if (Subtarget->hasVendorXAndesPerf()) {
782 Opc = RISCV::NDS_BFOZ;
783 } else if (Subtarget->hasVendorXqcibm()) {
784 Opc = RISCV::QC_EXTU;
785 // QC.EXTU X, width, shamt
786 // shamt is the same as Lsb
787 // width is the number of bits to extract from the Lsb
788 Msb = Msb - Lsb + 1;
789 } else {
790 // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment.
791 return false;
792 }
793
794 SDNode *Ube = CurDAG->getMachineNode(Opc, DL, VT, X,
795 CurDAG->getTargetConstant(Msb, DL, VT),
796 CurDAG->getTargetConstant(Lsb, DL, VT));
797 ReplaceNode(Node, Ube);
798 return true;
799}
800
802 const SDLoc &DL, MVT VT,
803 SDValue X, unsigned Msb,
804 unsigned Lsb) {
805 // Only supported with XAndesPerf at the moment.
806 if (!Subtarget->hasVendorXAndesPerf())
807 return false;
808
809 unsigned Opc = RISCV::NDS_BFOZ;
810
811 // If the Lsb is equal to the Msb, then the Lsb should be 0.
812 if (Lsb == Msb)
813 Lsb = 0;
814 SDNode *Ubi = CurDAG->getMachineNode(Opc, DL, VT, X,
815 CurDAG->getTargetConstant(Lsb, DL, VT),
816 CurDAG->getTargetConstant(Msb, DL, VT));
817 ReplaceNode(Node, Ubi);
818 return true;
819}
820
822 // Target does not support indexed loads.
823 if (!Subtarget->hasVendorXTHeadMemIdx())
824 return false;
825
828 if (AM == ISD::UNINDEXED)
829 return false;
830
832 if (!C)
833 return false;
834
835 EVT LoadVT = Ld->getMemoryVT();
836 assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) &&
837 "Unexpected addressing mode");
838 bool IsPre = AM == ISD::PRE_INC;
839 bool IsPost = AM == ISD::POST_INC;
840 int64_t Offset = C->getSExtValue();
841
842 // The constants that can be encoded in the THeadMemIdx instructions
843 // are of the form (sign_extend(imm5) << imm2).
844 unsigned Shift;
845 for (Shift = 0; Shift < 4; Shift++)
846 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
847 break;
848
849 // Constant cannot be encoded.
850 if (Shift == 4)
851 return false;
852
853 bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
854 unsigned Opcode;
855 if (LoadVT == MVT::i8 && IsPre)
856 Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
857 else if (LoadVT == MVT::i8 && IsPost)
858 Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
859 else if (LoadVT == MVT::i16 && IsPre)
860 Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
861 else if (LoadVT == MVT::i16 && IsPost)
862 Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
863 else if (LoadVT == MVT::i32 && IsPre)
864 Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
865 else if (LoadVT == MVT::i32 && IsPost)
866 Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
867 else if (LoadVT == MVT::i64 && IsPre)
868 Opcode = RISCV::TH_LDIB;
869 else if (LoadVT == MVT::i64 && IsPost)
870 Opcode = RISCV::TH_LDIA;
871 else
872 return false;
873
874 EVT Ty = Ld->getOffset().getValueType();
875 SDValue Ops[] = {
876 Ld->getBasePtr(),
877 CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(Node), Ty),
878 CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty), Ld->getChain()};
879 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0),
880 Ld->getValueType(1), MVT::Other, Ops);
881
882 MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand();
883 CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp});
884
885 ReplaceNode(Node, New);
886
887 return true;
888}
889
890static SDValue buildGPRPair(SelectionDAG *CurDAG, const SDLoc &DL, MVT VT,
891 SDValue Lo, SDValue Hi) {
892 SDValue Ops[] = {
893 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32), Lo,
894 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32), Hi,
895 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
896
897 return SDValue(
898 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops), 0);
899}
900
901// Helper to extract Lo and Hi values from a GPR pair.
902static std::pair<SDValue, SDValue>
904 SDValue Lo =
905 CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL, MVT::i32, Pair);
906 SDValue Hi =
907 CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_odd, DL, MVT::i32, Pair);
908 return {Lo, Hi};
909}
910
911// Try to match WMACC pattern: ADDD where one operand pair comes from a
912// widening multiply (both results of UMUL_LOHI, SMUL_LOHI, or WMULSU).
914 assert(Node->getOpcode() == RISCVISD::ADDD && "Expected ADDD");
915
916 SDValue Op0Lo = Node->getOperand(0);
917 SDValue Op0Hi = Node->getOperand(1);
918 SDValue Op1Lo = Node->getOperand(2);
919 SDValue Op1Hi = Node->getOperand(3);
920
921 auto IsSupportedMulWithOneUse = [](SDValue Lo, SDValue Hi) {
922 unsigned Opc = Lo.getOpcode();
923 if (Opc != ISD::UMUL_LOHI && Opc != ISD::SMUL_LOHI &&
924 Opc != RISCVISD::WMULSU)
925 return false;
926 return Lo.getNode() == Hi.getNode() && Lo.getResNo() == 0 &&
927 Hi.getResNo() == 1 && Lo.hasOneUse() && Hi.hasOneUse();
928 };
929
930 SDNode *MulNode = nullptr;
931 SDValue AddLo, AddHi;
932
933 // Check if first operand pair is a supported multiply with single use.
934 if (IsSupportedMulWithOneUse(Op0Lo, Op0Hi)) {
935 MulNode = Op0Lo.getNode();
936 AddLo = Op1Lo;
937 AddHi = Op1Hi;
938 }
939 // ADDD is commutative. Check if second operand pair is a supported multiply
940 // with single use.
941 else if (IsSupportedMulWithOneUse(Op1Lo, Op1Hi)) {
942 MulNode = Op1Lo.getNode();
943 AddLo = Op0Lo;
944 AddHi = Op0Hi;
945 } else {
946 return false;
947 }
948
949 unsigned Opc;
950 switch (MulNode->getOpcode()) {
951 default:
952 llvm_unreachable("Unexpected multiply opcode");
953 case ISD::UMUL_LOHI:
954 Opc = RISCV::WMACCU;
955 break;
956 case ISD::SMUL_LOHI:
957 Opc = RISCV::WMACC;
958 break;
959 case RISCVISD::WMULSU:
960 Opc = RISCV::WMACCSU;
961 break;
962 }
963
964 SDValue Acc = buildGPRPair(CurDAG, DL, MVT::Untyped, AddLo, AddHi);
965
966 // WMACC instruction format: rd, rs1, rs2 (rd is accumulator).
967 SDValue M0 = MulNode->getOperand(0);
968 SDValue M1 = MulNode->getOperand(1);
969 MachineSDNode *New =
970 CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Acc, M0, M1);
971
972 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, SDValue(New, 0));
975 CurDAG->RemoveDeadNode(Node);
976 return true;
977}
978
979static Register getTileReg(uint64_t TileNum) {
980 assert(TileNum <= 15 && "Invalid tile number");
981 return RISCV::T0 + TileNum;
982}
983
985 if (!Subtarget->hasVInstructions())
986 return;
987
988 assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode");
989
990 SDLoc DL(Node);
991 unsigned IntNo = Node->getConstantOperandVal(1);
992
993 assert((IntNo == Intrinsic::riscv_sf_vc_x_se ||
994 IntNo == Intrinsic::riscv_sf_vc_i_se) &&
995 "Unexpected vsetvli intrinsic");
996
997 // imm, imm, imm, simm5/scalar, sew, log2lmul, vl
998 unsigned Log2SEW = Log2_32(Node->getConstantOperandVal(6));
999 SDValue SEWOp =
1000 CurDAG->getTargetConstant(Log2SEW, DL, Subtarget->getXLenVT());
1001 SmallVector<SDValue, 8> Operands = {Node->getOperand(2), Node->getOperand(3),
1002 Node->getOperand(4), Node->getOperand(5),
1003 Node->getOperand(8), SEWOp,
1004 Node->getOperand(0)};
1005
1006 unsigned Opcode;
1007 auto *LMulSDNode = cast<ConstantSDNode>(Node->getOperand(7));
1008 switch (LMulSDNode->getSExtValue()) {
1009 case 5:
1010 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF8
1011 : RISCV::PseudoSF_VC_I_SE_MF8;
1012 break;
1013 case 6:
1014 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF4
1015 : RISCV::PseudoSF_VC_I_SE_MF4;
1016 break;
1017 case 7:
1018 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF2
1019 : RISCV::PseudoSF_VC_I_SE_MF2;
1020 break;
1021 case 0:
1022 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M1
1023 : RISCV::PseudoSF_VC_I_SE_M1;
1024 break;
1025 case 1:
1026 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M2
1027 : RISCV::PseudoSF_VC_I_SE_M2;
1028 break;
1029 case 2:
1030 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M4
1031 : RISCV::PseudoSF_VC_I_SE_M4;
1032 break;
1033 case 3:
1034 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M8
1035 : RISCV::PseudoSF_VC_I_SE_M8;
1036 break;
1037 }
1038
1039 ReplaceNode(Node, CurDAG->getMachineNode(
1040 Opcode, DL, Node->getSimpleValueType(0), Operands));
1041}
1042
1043static unsigned getSegInstNF(unsigned Intrinsic) {
1044#define INST_NF_CASE(NAME, NF) \
1045 case Intrinsic::riscv_##NAME##NF: \
1046 return NF;
1047#define INST_NF_CASE_MASK(NAME, NF) \
1048 case Intrinsic::riscv_##NAME##NF##_mask: \
1049 return NF;
1050#define INST_NF_CASE_FF(NAME, NF) \
1051 case Intrinsic::riscv_##NAME##NF##ff: \
1052 return NF;
1053#define INST_NF_CASE_FF_MASK(NAME, NF) \
1054 case Intrinsic::riscv_##NAME##NF##ff_mask: \
1055 return NF;
1056#define INST_ALL_NF_CASE_BASE(MACRO_NAME, NAME) \
1057 MACRO_NAME(NAME, 2) \
1058 MACRO_NAME(NAME, 3) \
1059 MACRO_NAME(NAME, 4) \
1060 MACRO_NAME(NAME, 5) \
1061 MACRO_NAME(NAME, 6) \
1062 MACRO_NAME(NAME, 7) \
1063 MACRO_NAME(NAME, 8)
1064#define INST_ALL_NF_CASE(NAME) \
1065 INST_ALL_NF_CASE_BASE(INST_NF_CASE, NAME) \
1066 INST_ALL_NF_CASE_BASE(INST_NF_CASE_MASK, NAME)
1067#define INST_ALL_NF_CASE_WITH_FF(NAME) \
1068 INST_ALL_NF_CASE(NAME) \
1069 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF, NAME) \
1070 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF_MASK, NAME)
1071 switch (Intrinsic) {
1072 default:
1073 llvm_unreachable("Unexpected segment load/store intrinsic");
1075 INST_ALL_NF_CASE(vlsseg)
1076 INST_ALL_NF_CASE(vloxseg)
1077 INST_ALL_NF_CASE(vluxseg)
1078 INST_ALL_NF_CASE(vsseg)
1079 INST_ALL_NF_CASE(vssseg)
1080 INST_ALL_NF_CASE(vsoxseg)
1081 INST_ALL_NF_CASE(vsuxseg)
1082 }
1083}
1084
1085static bool isApplicableToPLIOrPLUI(int Val) {
1086 // Check if the immediate is packed i8 or i10
1087 int16_t Bit31To16 = Val >> 16;
1088 int16_t Bit15To0 = Val;
1089 int8_t Bit15To8 = Bit15To0 >> 8;
1090 int8_t Bit7To0 = Val;
1091 if (Bit31To16 != Bit15To0)
1092 return false;
1093
1094 return isInt<10>(Bit15To0) || isShiftedInt<10, 6>(Bit15To0) ||
1095 Bit15To8 == Bit7To0;
1096}
1097
1099 // If we have a custom node, we have already selected.
1100 if (Node->isMachineOpcode()) {
1101 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
1102 Node->setNodeId(-1);
1103 return;
1104 }
1105
1106 // Instruction Selection not handled by the auto-generated tablegen selection
1107 // should be handled here.
1108 unsigned Opcode = Node->getOpcode();
1109 MVT XLenVT = Subtarget->getXLenVT();
1110 SDLoc DL(Node);
1111 MVT VT = Node->getSimpleValueType(0);
1112
1113 bool HasBitTest = Subtarget->hasBEXTILike();
1114
1115 switch (Opcode) {
1116 case ISD::Constant: {
1117 assert(VT == Subtarget->getXLenVT() && "Unexpected VT");
1118 auto *ConstNode = cast<ConstantSDNode>(Node);
1119 if (ConstNode->isZero()) {
1120 SDValue New =
1121 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
1122 ReplaceNode(Node, New.getNode());
1123 return;
1124 }
1125 int64_t Imm = ConstNode->getSExtValue();
1126 // If only the lower 8 bits are used, try to convert this to a simm6 by
1127 // sign-extending bit 7. This is neutral without the C extension, and
1128 // allows C.LI to be used if C is present.
1129 if (!isInt<8>(Imm) && isUInt<8>(Imm) && isInt<6>(SignExtend64<8>(Imm)) &&
1131 Imm = SignExtend64<8>(Imm);
1132 // If the upper XLen-16 bits are not used, try to convert this to a simm12
1133 // by sign extending bit 15.
1134 else if (!isInt<16>(Imm) && isUInt<16>(Imm) &&
1136 Imm = SignExtend64<16>(Imm);
1137
1138 // If the upper XLen-16 bits are not used, the lower 2 bytes are the same,
1139 // and we can't use li, convert to an xlen splat so we can use pli.b.
1140 if (Subtarget->hasStdExtP() && !isInt<12>(Imm) &&
1141 (Imm & 0xff) == ((Imm >> 8) & 0xff) && hasAllHUsers(Node)) {
1142 // Splat the lower 16 bits to XLen. Sign extend for RV32.
1143 uint64_t Splat = Imm & 0xffff;
1144 Splat = (Splat << 16) | Splat;
1145 if (VT == MVT::i64)
1146 Imm = Splat << 32 | Splat;
1147 else
1148 Imm = SignExtend64<32>(Splat);
1149 } else {
1150 // If the upper 32-bits are not used try to convert this into a simm32 by
1151 // sign extending bit 32.
1152 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
1153 Imm = SignExtend64<32>(Imm);
1154
1155 if (VT == MVT::i64 && !isInt<12>(Imm) && !isShiftedInt<20, 12>(Imm) &&
1156 Subtarget->hasStdExtP() && isApplicableToPLIOrPLUI(Imm) &&
1157 hasAllWUsers(Node)) {
1158 // If it's 4 packed 8-bit integers or 2 packed signed 16-bit integers,
1159 // we can simply copy lower 32 bits to higher 32 bits to make it able to
1160 // rematerialize to PLI_B or PLI_H
1161 Imm = ((uint64_t)Imm << 32) | (Imm & 0xFFFFFFFF);
1162 }
1163 }
1164
1165 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode());
1166 return;
1167 }
1168 case ISD::ConstantFP: {
1169 const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();
1170
1171 bool Is64Bit = Subtarget->is64Bit();
1172 bool HasZdinx = Subtarget->hasStdExtZdinx();
1173
1174 bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
1175 SDValue Imm;
1176 // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
1177 // create an integer immediate.
1178 if (APF.isPosZero() || NegZeroF64) {
1179 if (VT == MVT::f64 && HasZdinx && !Is64Bit)
1180 Imm = CurDAG->getRegister(RISCV::X0_Pair, MVT::f64);
1181 else
1182 Imm = CurDAG->getRegister(RISCV::X0, XLenVT);
1183 } else {
1184 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
1185 *Subtarget);
1186 }
1187
1188 unsigned Opc;
1189 switch (VT.SimpleTy) {
1190 default:
1191 llvm_unreachable("Unexpected size");
1192 case MVT::bf16:
1193 assert(Subtarget->hasStdExtZfbfmin());
1194 Opc = RISCV::FMV_H_X;
1195 break;
1196 case MVT::f16:
1197 Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
1198 break;
1199 case MVT::f32:
1200 Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
1201 break;
1202 case MVT::f64:
1203 // For RV32, we can't move from a GPR, we need to convert instead. This
1204 // should only happen for +0.0 and -0.0.
1205 assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
1206 if (HasZdinx)
1207 Opc = RISCV::COPY;
1208 else
1209 Opc = Is64Bit ? RISCV::FMV_D_X : RISCV::FCVT_D_W;
1210 break;
1211 }
1212
1213 SDNode *Res;
1214 if (VT.SimpleTy == MVT::f16 && Opc == RISCV::COPY) {
1215 Res =
1216 CurDAG->getTargetExtractSubreg(RISCV::sub_16, DL, VT, Imm).getNode();
1217 } else if (VT.SimpleTy == MVT::f32 && Opc == RISCV::COPY) {
1218 Res =
1219 CurDAG->getTargetExtractSubreg(RISCV::sub_32, DL, VT, Imm).getNode();
1220 } else if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
1221 Res = CurDAG->getMachineNode(
1222 Opc, DL, VT, Imm,
1223 CurDAG->getTargetConstant(RISCVFPRndMode::RNE, DL, XLenVT));
1224 else
1225 Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
1226
1227 // For f64 -0.0, we need to insert a fneg.d idiom.
1228 if (NegZeroF64) {
1229 Opc = RISCV::FSGNJN_D;
1230 if (HasZdinx)
1231 Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;
1232 Res =
1233 CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0));
1234 }
1235
1236 ReplaceNode(Node, Res);
1237 return;
1238 }
1239 case RISCVISD::BuildGPRPair:
1240 case RISCVISD::BuildPairF64: {
1241 if (Opcode == RISCVISD::BuildPairF64 && !Subtarget->hasStdExtZdinx())
1242 break;
1243
1244 assert((!Subtarget->is64Bit() || Opcode == RISCVISD::BuildGPRPair) &&
1245 "BuildPairF64 only handled here on rv32i_zdinx");
1246
1247 SDValue N =
1248 buildGPRPair(CurDAG, DL, VT, Node->getOperand(0), Node->getOperand(1));
1249 ReplaceNode(Node, N.getNode());
1250 return;
1251 }
1252 case RISCVISD::SplitGPRPair:
1253 case RISCVISD::SplitF64: {
1254 if (Subtarget->hasStdExtZdinx() || Opcode != RISCVISD::SplitF64) {
1255 assert((!Subtarget->is64Bit() || Opcode == RISCVISD::SplitGPRPair) &&
1256 "SplitF64 only handled here on rv32i_zdinx");
1257
1258 if (!SDValue(Node, 0).use_empty()) {
1259 SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL,
1260 Node->getValueType(0),
1261 Node->getOperand(0));
1262 ReplaceUses(SDValue(Node, 0), Lo);
1263 }
1264
1265 if (!SDValue(Node, 1).use_empty()) {
1266 SDValue Hi = CurDAG->getTargetExtractSubreg(
1267 RISCV::sub_gpr_odd, DL, Node->getValueType(1), Node->getOperand(0));
1268 ReplaceUses(SDValue(Node, 1), Hi);
1269 }
1270
1271 CurDAG->RemoveDeadNode(Node);
1272 return;
1273 }
1274
1275 assert(Opcode != RISCVISD::SplitGPRPair &&
1276 "SplitGPRPair should already be handled");
1277
1278 if (!Subtarget->hasStdExtZfa())
1279 break;
1280 assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
1281 "Unexpected subtarget");
1282
1283 // With Zfa, lower to fmv.x.w and fmvh.x.d.
1284 if (!SDValue(Node, 0).use_empty()) {
1285 SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT,
1286 Node->getOperand(0));
1287 ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0));
1288 }
1289 if (!SDValue(Node, 1).use_empty()) {
1290 SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT,
1291 Node->getOperand(0));
1292 ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0));
1293 }
1294
1295 CurDAG->RemoveDeadNode(Node);
1296 return;
1297 }
1298 case ISD::SHL: {
1299 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1300 if (!N1C)
1301 break;
1302 SDValue N0 = Node->getOperand(0);
1303 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
1305 break;
1306 unsigned ShAmt = N1C->getZExtValue();
1307 uint64_t Mask = N0.getConstantOperandVal(1);
1308
1309 if (isShiftedMask_64(Mask)) {
1310 unsigned XLen = Subtarget->getXLen();
1311 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1312 unsigned TrailingZeros = llvm::countr_zero(Mask);
1313 if (ShAmt <= 32 && TrailingZeros > 0 && LeadingZeros == 32) {
1314 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C)
1315 // where C2 has 32 leading zeros and C3 trailing zeros.
1316 SDNode *SRLIW = CurDAG->getMachineNode(
1317 RISCV::SRLIW, DL, VT, N0.getOperand(0),
1318 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1319 SDNode *SLLI = CurDAG->getMachineNode(
1320 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1321 CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));
1322 ReplaceNode(Node, SLLI);
1323 return;
1324 }
1325 if (TrailingZeros == 0 && LeadingZeros > ShAmt &&
1326 XLen - LeadingZeros > 11 && LeadingZeros != 32) {
1327 // Optimize (shl (and X, C2), C) -> (srli (slli X, C4), C4-C)
1328 // where C2 has C4 leading zeros and no trailing zeros.
1329 // This is profitable if the "and" was to be lowered to
1330 // (srli (slli X, C4), C4) and not (andi X, C2).
1331 // For "LeadingZeros == 32":
1332 // - with Zba it's just (slli.uw X, C)
1333 // - without Zba a tablegen pattern applies the very same
1334 // transform as we would have done here
1335 SDNode *SLLI = CurDAG->getMachineNode(
1336 RISCV::SLLI, DL, VT, N0.getOperand(0),
1337 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1338 SDNode *SRLI = CurDAG->getMachineNode(
1339 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1340 CurDAG->getTargetConstant(LeadingZeros - ShAmt, DL, VT));
1341 ReplaceNode(Node, SRLI);
1342 return;
1343 }
1344 }
1345 break;
1346 }
1347 case ISD::SRL: {
1348 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1349 if (!N1C)
1350 break;
1351 SDValue N0 = Node->getOperand(0);
1352 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1353 break;
1354 unsigned ShAmt = N1C->getZExtValue();
1355 uint64_t Mask = N0.getConstantOperandVal(1);
1356
1357 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
1358 // 32 leading zeros and C3 trailing zeros.
1359 if (isShiftedMask_64(Mask) && N0.hasOneUse()) {
1360 unsigned XLen = Subtarget->getXLen();
1361 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1362 unsigned TrailingZeros = llvm::countr_zero(Mask);
1363 if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
1364 SDNode *SRLIW = CurDAG->getMachineNode(
1365 RISCV::SRLIW, DL, VT, N0.getOperand(0),
1366 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1367 SDNode *SLLI = CurDAG->getMachineNode(
1368 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1369 CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT));
1370 ReplaceNode(Node, SLLI);
1371 return;
1372 }
1373 }
1374
1375 // Optimize (srl (and X, C2), C) ->
1376 // (srli (slli X, (XLen-C3), (XLen-C3) + C)
1377 // Where C2 is a mask with C3 trailing ones.
1378 // Taking into account that the C2 may have had lower bits unset by
1379 // SimplifyDemandedBits. This avoids materializing the C2 immediate.
1380 // This pattern occurs when type legalizing right shifts for types with
1381 // less than XLen bits.
1382 Mask |= maskTrailingOnes<uint64_t>(ShAmt);
1383 if (!isMask_64(Mask))
1384 break;
1385 unsigned TrailingOnes = llvm::countr_one(Mask);
1386 if (ShAmt >= TrailingOnes)
1387 break;
1388 // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.
1389 if (TrailingOnes == 32) {
1390 SDNode *SRLI = CurDAG->getMachineNode(
1391 Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT,
1392 N0.getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1393 ReplaceNode(Node, SRLI);
1394 return;
1395 }
1396
1397 // Only do the remaining transforms if the AND has one use.
1398 if (!N0.hasOneUse())
1399 break;
1400
1401 // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1402 if (HasBitTest && ShAmt + 1 == TrailingOnes) {
1403 SDNode *BEXTI = CurDAG->getMachineNode(
1404 Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT,
1405 N0.getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1406 ReplaceNode(Node, BEXTI);
1407 return;
1408 }
1409
1410 const unsigned Msb = TrailingOnes - 1;
1411 const unsigned Lsb = ShAmt;
1412 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0.getOperand(0), Msb, Lsb))
1413 return;
1414
1415 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
1416 SDNode *SLLI =
1417 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1418 CurDAG->getTargetConstant(LShAmt, DL, VT));
1419 SDNode *SRLI = CurDAG->getMachineNode(
1420 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1421 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1422 ReplaceNode(Node, SRLI);
1423 return;
1424 }
1425 case ISD::SRA: {
1427 return;
1428
1430 return;
1431
1432 // Optimize (sra (sext_inreg X, i16), C) ->
1433 // (srai (slli X, (XLen-16), (XLen-16) + C)
1434 // And (sra (sext_inreg X, i8), C) ->
1435 // (srai (slli X, (XLen-8), (XLen-8) + C)
1436 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1437 // This transform matches the code we get without Zbb. The shifts are more
1438 // compressible, and this can help expose CSE opportunities in the sdiv by
1439 // constant optimization.
1440 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1441 if (!N1C)
1442 break;
1443 SDValue N0 = Node->getOperand(0);
1444 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
1445 break;
1446 unsigned ShAmt = N1C->getZExtValue();
1447 unsigned ExtSize =
1448 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
1449 // ExtSize of 32 should use sraiw via tablegen pattern.
1450 if (ExtSize >= 32 || ShAmt >= ExtSize)
1451 break;
1452 unsigned LShAmt = Subtarget->getXLen() - ExtSize;
1453 SDNode *SLLI =
1454 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1455 CurDAG->getTargetConstant(LShAmt, DL, VT));
1456 SDNode *SRAI = CurDAG->getMachineNode(
1457 RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
1458 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1459 ReplaceNode(Node, SRAI);
1460 return;
1461 }
1463 // Optimize (sext_inreg (srl X, C), i8/i16) ->
1464 // (srai (slli X, XLen-ExtSize-C), XLen-ExtSize)
1465 // This is a bitfield extract pattern where we're extracting a signed
1466 // 8-bit or 16-bit field from position C.
1467 SDValue N0 = Node->getOperand(0);
1468 if (N0.getOpcode() != ISD::SRL || !N0.hasOneUse())
1469 break;
1470
1471 auto *ShAmtC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1472 if (!ShAmtC)
1473 break;
1474
1475 unsigned ExtSize =
1476 cast<VTSDNode>(Node->getOperand(1))->getVT().getSizeInBits();
1477 unsigned ShAmt = ShAmtC->getZExtValue();
1478 unsigned XLen = Subtarget->getXLen();
1479
1480 // Only handle types less than 32, and make sure the shift amount is valid.
1481 if (ExtSize >= 32 || ShAmt >= XLen - ExtSize)
1482 break;
1483
1484 unsigned LShAmt = XLen - ExtSize - ShAmt;
1485 SDNode *SLLI =
1486 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1487 CurDAG->getTargetConstant(LShAmt, DL, VT));
1488 SDNode *SRAI = CurDAG->getMachineNode(
1489 RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
1490 CurDAG->getTargetConstant(XLen - ExtSize, DL, VT));
1491 ReplaceNode(Node, SRAI);
1492 return;
1493 }
1494 case ISD::OR: {
1496 return;
1497
1498 break;
1499 }
1500 case ISD::XOR:
1502 return;
1503
1504 break;
1505 case ISD::AND: {
1506 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1507 if (!N1C)
1508 break;
1509
1510 SDValue N0 = Node->getOperand(0);
1511
1512 bool LeftShift = N0.getOpcode() == ISD::SHL;
1513 if (LeftShift || N0.getOpcode() == ISD::SRL) {
1514 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1515 if (!C)
1516 break;
1517 unsigned C2 = C->getZExtValue();
1518 unsigned XLen = Subtarget->getXLen();
1519 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1520
1521 // Keep track of whether this is a c.andi. If we can't use c.andi, the
1522 // shift pair might offer more compression opportunities.
1523 // TODO: We could check for C extension here, but we don't have many lit
1524 // tests with the C extension enabled so not checking gets better
1525 // coverage.
1526 // TODO: What if ANDI faster than shift?
1527 bool IsCANDI = isInt<6>(N1C->getSExtValue());
1528
1529 uint64_t C1 = N1C->getZExtValue();
1530
1531 // Clear irrelevant bits in the mask.
1532 if (LeftShift)
1534 else
1535 C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
1536
1537 // Some transforms should only be done if the shift has a single use or
1538 // the AND would become (srli (slli X, 32), 32)
1539 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
1540
1541 SDValue X = N0.getOperand(0);
1542
1543 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1544 // with c3 leading zeros.
1545 if (!LeftShift && isMask_64(C1)) {
1546 unsigned Leading = XLen - llvm::bit_width(C1);
1547 if (C2 < Leading) {
1548 // If the number of leading zeros is C2+32 this can be SRLIW.
1549 if (C2 + 32 == Leading) {
1550 SDNode *SRLIW = CurDAG->getMachineNode(
1551 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
1552 ReplaceNode(Node, SRLIW);
1553 return;
1554 }
1555
1556 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1557 // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1558 //
1559 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1560 // legalized and goes through DAG combine.
1561 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
1562 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1563 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
1564 SDNode *SRAIW =
1565 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0),
1566 CurDAG->getTargetConstant(31, DL, VT));
1567 SDNode *SRLIW = CurDAG->getMachineNode(
1568 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),
1569 CurDAG->getTargetConstant(Leading - 32, DL, VT));
1570 ReplaceNode(Node, SRLIW);
1571 return;
1572 }
1573
1574 // Try to use an unsigned bitfield extract (e.g., th.extu) if
1575 // available.
1576 // Transform (and (srl x, C2), C1)
1577 // -> (<bfextract> x, msb, lsb)
1578 //
1579 // Make sure to keep this below the SRLIW cases, as we always want to
1580 // prefer the more common instruction.
1581 const unsigned Msb = llvm::bit_width(C1) + C2 - 1;
1582 const unsigned Lsb = C2;
1583 if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
1584 return;
1585
1586 // (srli (slli x, c3-c2), c3).
1587 // Skip if we could use (zext.w (sraiw X, C2)).
1588 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
1589 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1590 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
1591 // Also Skip if we can use bexti or th.tst.
1592 Skip |= HasBitTest && Leading == XLen - 1;
1593 if (OneUseOrZExtW && !Skip) {
1594 SDNode *SLLI = CurDAG->getMachineNode(
1595 RISCV::SLLI, DL, VT, X,
1596 CurDAG->getTargetConstant(Leading - C2, DL, VT));
1597 SDNode *SRLI = CurDAG->getMachineNode(
1598 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1599 CurDAG->getTargetConstant(Leading, DL, VT));
1600 ReplaceNode(Node, SRLI);
1601 return;
1602 }
1603 }
1604 }
1605
1606 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1607 // shifted by c2 bits with c3 leading zeros.
1608 if (LeftShift && isShiftedMask_64(C1)) {
1609 unsigned Leading = XLen - llvm::bit_width(C1);
1610
1611 if (C2 + Leading < XLen &&
1612 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) {
1613 // Use slli.uw when possible.
1614 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
1615 SDNode *SLLI_UW =
1616 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X,
1617 CurDAG->getTargetConstant(C2, DL, VT));
1618 ReplaceNode(Node, SLLI_UW);
1619 return;
1620 }
1621
1622 // Try to use an unsigned bitfield insert (e.g., nds.bfoz) if
1623 // available.
1624 // Transform (and (shl x, c2), c1)
1625 // -> (<bfinsert> x, msb, lsb)
1626 // e.g.
1627 // (and (shl x, 12), 0x00fff000)
1628 // If XLen = 32 and C2 = 12, then
1629 // Msb = 32 - 8 - 1 = 23 and Lsb = 12
1630 const unsigned Msb = XLen - Leading - 1;
1631 const unsigned Lsb = C2;
1632 if (tryUnsignedBitfieldInsertInZero(Node, DL, VT, X, Msb, Lsb))
1633 return;
1634
1635 if (OneUseOrZExtW && !IsCANDI) {
1636 // (packh x0, X)
1637 if (Subtarget->hasStdExtZbkb() && C1 == 0xff00 && C2 == 8) {
1638 SDNode *PACKH = CurDAG->getMachineNode(
1639 RISCV::PACKH, DL, VT,
1640 CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT()), X);
1641 ReplaceNode(Node, PACKH);
1642 return;
1643 }
1644 // (srli (slli c2+c3), c3)
1645 SDNode *SLLI = CurDAG->getMachineNode(
1646 RISCV::SLLI, DL, VT, X,
1647 CurDAG->getTargetConstant(C2 + Leading, DL, VT));
1648 SDNode *SRLI = CurDAG->getMachineNode(
1649 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1650 CurDAG->getTargetConstant(Leading, DL, VT));
1651 ReplaceNode(Node, SRLI);
1652 return;
1653 }
1654 }
1655 }
1656
1657 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1658 // shifted mask with c2 leading zeros and c3 trailing zeros.
1659 if (!LeftShift && isShiftedMask_64(C1)) {
1660 unsigned Leading = XLen - llvm::bit_width(C1);
1661 unsigned Trailing = llvm::countr_zero(C1);
1662 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
1663 !IsCANDI) {
1664 unsigned SrliOpc = RISCV::SRLI;
1665 // If the input is zexti32 we should use SRLIW.
1666 if (X.getOpcode() == ISD::AND &&
1667 isa<ConstantSDNode>(X.getOperand(1)) &&
1668 X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
1669 SrliOpc = RISCV::SRLIW;
1670 X = X.getOperand(0);
1671 }
1672 SDNode *SRLI = CurDAG->getMachineNode(
1673 SrliOpc, DL, VT, X,
1674 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1675 SDNode *SLLI = CurDAG->getMachineNode(
1676 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1677 CurDAG->getTargetConstant(Trailing, DL, VT));
1678 ReplaceNode(Node, SLLI);
1679 return;
1680 }
1681 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1682 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
1683 OneUseOrZExtW && !IsCANDI) {
1684 SDNode *SRLIW = CurDAG->getMachineNode(
1685 RISCV::SRLIW, DL, VT, X,
1686 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1687 SDNode *SLLI = CurDAG->getMachineNode(
1688 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1689 CurDAG->getTargetConstant(Trailing, DL, VT));
1690 ReplaceNode(Node, SLLI);
1691 return;
1692 }
1693 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1694 if (Trailing > 0 && Leading + Trailing == 32 && C2 + Trailing < XLen &&
1695 OneUseOrZExtW && Subtarget->hasStdExtZba()) {
1696 SDNode *SRLI = CurDAG->getMachineNode(
1697 RISCV::SRLI, DL, VT, X,
1698 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1699 SDNode *SLLI_UW = CurDAG->getMachineNode(
1700 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1701 CurDAG->getTargetConstant(Trailing, DL, VT));
1702 ReplaceNode(Node, SLLI_UW);
1703 return;
1704 }
1705 }
1706
1707 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1708 // shifted mask with no leading zeros and c3 trailing zeros.
1709 if (LeftShift && isShiftedMask_64(C1)) {
1710 unsigned Leading = XLen - llvm::bit_width(C1);
1711 unsigned Trailing = llvm::countr_zero(C1);
1712 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
1713 SDNode *SRLI = CurDAG->getMachineNode(
1714 RISCV::SRLI, DL, VT, X,
1715 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1716 SDNode *SLLI = CurDAG->getMachineNode(
1717 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1718 CurDAG->getTargetConstant(Trailing, DL, VT));
1719 ReplaceNode(Node, SLLI);
1720 return;
1721 }
1722 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1723 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
1724 SDNode *SRLIW = CurDAG->getMachineNode(
1725 RISCV::SRLIW, DL, VT, X,
1726 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1727 SDNode *SLLI = CurDAG->getMachineNode(
1728 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1729 CurDAG->getTargetConstant(Trailing, DL, VT));
1730 ReplaceNode(Node, SLLI);
1731 return;
1732 }
1733
1734 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1735 if (C2 < Trailing && Leading + Trailing == 32 && OneUseOrZExtW &&
1736 Subtarget->hasStdExtZba()) {
1737 SDNode *SRLI = CurDAG->getMachineNode(
1738 RISCV::SRLI, DL, VT, X,
1739 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1740 SDNode *SLLI_UW = CurDAG->getMachineNode(
1741 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1742 CurDAG->getTargetConstant(Trailing, DL, VT));
1743 ReplaceNode(Node, SLLI_UW);
1744 return;
1745 }
1746 }
1747 }
1748
1749 const uint64_t C1 = N1C->getZExtValue();
1750
1751 if (N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
1752 N0.hasOneUse()) {
1753 unsigned C2 = N0.getConstantOperandVal(1);
1754 unsigned XLen = Subtarget->getXLen();
1755 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1756
1757 SDValue X = N0.getOperand(0);
1758
1759 // Prefer SRAIW + ANDI when possible.
1760 bool Skip = C2 > 32 && isInt<12>(N1C->getSExtValue()) &&
1761 X.getOpcode() == ISD::SHL &&
1762 isa<ConstantSDNode>(X.getOperand(1)) &&
1763 X.getConstantOperandVal(1) == 32;
1764 // Turn (and (sra x, c2), c1) -> (srli (srai x, c2-c3), c3) if c1 is a
1765 // mask with c3 leading zeros and c2 is larger than c3.
1766 if (isMask_64(C1) && !Skip) {
1767 unsigned Leading = XLen - llvm::bit_width(C1);
1768 if (C2 > Leading) {
1769 SDNode *SRAI = CurDAG->getMachineNode(
1770 RISCV::SRAI, DL, VT, X,
1771 CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1772 SDNode *SRLI = CurDAG->getMachineNode(
1773 RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1774 CurDAG->getTargetConstant(Leading, DL, VT));
1775 ReplaceNode(Node, SRLI);
1776 return;
1777 }
1778 }
1779
1780 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
1781 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
1782 // use (slli (srli (srai y, c2 - c3), c3 + c4), c4).
1783 if (isShiftedMask_64(C1) && !Skip) {
1784 unsigned Leading = XLen - llvm::bit_width(C1);
1785 unsigned Trailing = llvm::countr_zero(C1);
1786 if (C2 > Leading && Leading > 0 && Trailing > 0) {
1787 SDNode *SRAI = CurDAG->getMachineNode(
1788 RISCV::SRAI, DL, VT, N0.getOperand(0),
1789 CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1790 SDNode *SRLI = CurDAG->getMachineNode(
1791 RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1792 CurDAG->getTargetConstant(Leading + Trailing, DL, VT));
1793 SDNode *SLLI = CurDAG->getMachineNode(
1794 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1795 CurDAG->getTargetConstant(Trailing, DL, VT));
1796 ReplaceNode(Node, SLLI);
1797 return;
1798 }
1799 }
1800 }
1801
1802 // If C1 masks off the upper bits only (but can't be formed as an
1803 // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1804 // available.
1805 // Transform (and x, C1)
1806 // -> (<bfextract> x, msb, lsb)
1807 if (isMask_64(C1) && !isInt<12>(N1C->getSExtValue()) &&
1808 !(C1 == 0xffff && Subtarget->hasStdExtZbb()) &&
1809 !(C1 == 0xffffffff && Subtarget->hasStdExtZba())) {
1810 const unsigned Msb = llvm::bit_width(C1) - 1;
1811 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0))
1812 return;
1813 }
1814
1816 return;
1817
1818 break;
1819 }
1820 case ISD::MUL: {
1821 // Special case for calculating (mul (and X, C2), C1) where the full product
1822 // fits in XLen bits. We can shift X left by the number of leading zeros in
1823 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1824 // product has XLen trailing zeros, putting it in the output of MULHU. This
1825 // can avoid materializing a constant in a register for C2.
1826
1827 // RHS should be a constant.
1828 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1829 if (!N1C || !N1C->hasOneUse())
1830 break;
1831
1832 // LHS should be an AND with constant.
1833 SDValue N0 = Node->getOperand(0);
1834 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1835 break;
1836
1838
1839 // Constant should be a mask.
1840 if (!isMask_64(C2))
1841 break;
1842
1843 // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1844 // multiple users or the constant is a simm12. This prevents inserting a
1845 // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1846 // make it more costly to materialize. Otherwise, using a SLLI might allow
1847 // it to be compressed.
1848 bool IsANDIOrZExt =
1849 isInt<12>(C2) ||
1850 (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
1851 // With XTHeadBb, we can use TH.EXTU.
1852 IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
1853 if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse()))
1854 break;
1855 // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1856 // the constant is a simm32.
1857 bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
1858 // With XTHeadBb, we can use TH.EXTU.
1859 IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
1860 if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse()))
1861 break;
1862
1863 // We need to shift left the AND input and C1 by a total of XLen bits.
1864
1865 // How far left do we need to shift the AND input?
1866 unsigned XLen = Subtarget->getXLen();
1867 unsigned LeadingZeros = XLen - llvm::bit_width(C2);
1868
1869 // The constant gets shifted by the remaining amount unless that would
1870 // shift bits out.
1871 uint64_t C1 = N1C->getZExtValue();
1872 unsigned ConstantShift = XLen - LeadingZeros;
1873 if (ConstantShift > (XLen - llvm::bit_width(C1)))
1874 break;
1875
1876 uint64_t ShiftedC1 = C1 << ConstantShift;
1877 // If this RV32, we need to sign extend the constant.
1878 if (XLen == 32)
1879 ShiftedC1 = SignExtend64<32>(ShiftedC1);
1880
1881 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1882 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode();
1883 SDNode *SLLI =
1884 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1885 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1886 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,
1887 SDValue(SLLI, 0), SDValue(Imm, 0));
1888 ReplaceNode(Node, MULHU);
1889 return;
1890 }
1891 case ISD::SMUL_LOHI:
1892 case ISD::UMUL_LOHI:
1893 case RISCVISD::WMULSU:
1894 case RISCVISD::WADDU:
1895 case RISCVISD::WSUBU: {
1896 assert(Subtarget->hasStdExtP() && !Subtarget->is64Bit() && VT == MVT::i32 &&
1897 "Unexpected opcode");
1898
1899 unsigned Opc;
1900 switch (Node->getOpcode()) {
1901 default:
1902 llvm_unreachable("Unexpected opcode");
1903 case ISD::SMUL_LOHI:
1904 Opc = RISCV::WMUL;
1905 break;
1906 case ISD::UMUL_LOHI:
1907 Opc = RISCV::WMULU;
1908 break;
1909 case RISCVISD::WMULSU:
1910 Opc = RISCV::WMULSU;
1911 break;
1912 case RISCVISD::WADDU:
1913 Opc = RISCV::WADDU;
1914 break;
1915 case RISCVISD::WSUBU:
1916 Opc = RISCV::WSUBU;
1917 break;
1918 }
1919
1920 SDNode *Result = CurDAG->getMachineNode(
1921 Opc, DL, MVT::Untyped, Node->getOperand(0), Node->getOperand(1));
1922
1923 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, SDValue(Result, 0));
1924 ReplaceUses(SDValue(Node, 0), Lo);
1925 ReplaceUses(SDValue(Node, 1), Hi);
1926 CurDAG->RemoveDeadNode(Node);
1927 return;
1928 }
1929 case RISCVISD::WSLL:
1930 case RISCVISD::WSLA: {
1931 // Custom select WSLL/WSLA for RV32P.
1932 assert(Subtarget->hasStdExtP() && !Subtarget->is64Bit() && VT == MVT::i32 &&
1933 "Unexpected opcode");
1934
1935 bool IsSigned = Node->getOpcode() == RISCVISD::WSLA;
1936
1937 SDValue ShAmt = Node->getOperand(1);
1938
1939 unsigned Opc;
1940
1941 auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt);
1942 if (ShAmtC && ShAmtC->getZExtValue() < 64) {
1943 Opc = IsSigned ? RISCV::WSLAI : RISCV::WSLLI;
1944 ShAmt = CurDAG->getTargetConstant(ShAmtC->getZExtValue(), DL, XLenVT);
1945 } else {
1946 Opc = IsSigned ? RISCV::WSLA : RISCV::WSLL;
1947 }
1948
1949 SDNode *WShift = CurDAG->getMachineNode(Opc, DL, MVT::Untyped,
1950 Node->getOperand(0), ShAmt);
1951
1952 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, SDValue(WShift, 0));
1953 ReplaceUses(SDValue(Node, 0), Lo);
1954 ReplaceUses(SDValue(Node, 1), Hi);
1955 CurDAG->RemoveDeadNode(Node);
1956 return;
1957 }
1958 case ISD::LOAD: {
1959 if (tryIndexedLoad(Node))
1960 return;
1961
1962 if (Subtarget->hasVendorXCVmem() && !Subtarget->is64Bit()) {
1963 // We match post-incrementing load here
1965 if (Load->getAddressingMode() != ISD::POST_INC)
1966 break;
1967
1968 SDValue Chain = Node->getOperand(0);
1969 SDValue Base = Node->getOperand(1);
1970 SDValue Offset = Node->getOperand(2);
1971
1972 bool Simm12 = false;
1973 bool SignExtend = Load->getExtensionType() == ISD::SEXTLOAD;
1974
1975 if (auto ConstantOffset = dyn_cast<ConstantSDNode>(Offset)) {
1976 int ConstantVal = ConstantOffset->getSExtValue();
1977 Simm12 = isInt<12>(ConstantVal);
1978 if (Simm12)
1979 Offset = CurDAG->getSignedTargetConstant(ConstantVal, SDLoc(Offset),
1980 Offset.getValueType());
1981 }
1982
1983 unsigned Opcode = 0;
1984 switch (Load->getMemoryVT().getSimpleVT().SimpleTy) {
1985 case MVT::i8:
1986 if (Simm12 && SignExtend)
1987 Opcode = RISCV::CV_LB_ri_inc;
1988 else if (Simm12 && !SignExtend)
1989 Opcode = RISCV::CV_LBU_ri_inc;
1990 else if (!Simm12 && SignExtend)
1991 Opcode = RISCV::CV_LB_rr_inc;
1992 else
1993 Opcode = RISCV::CV_LBU_rr_inc;
1994 break;
1995 case MVT::i16:
1996 if (Simm12 && SignExtend)
1997 Opcode = RISCV::CV_LH_ri_inc;
1998 else if (Simm12 && !SignExtend)
1999 Opcode = RISCV::CV_LHU_ri_inc;
2000 else if (!Simm12 && SignExtend)
2001 Opcode = RISCV::CV_LH_rr_inc;
2002 else
2003 Opcode = RISCV::CV_LHU_rr_inc;
2004 break;
2005 case MVT::i32:
2006 if (Simm12)
2007 Opcode = RISCV::CV_LW_ri_inc;
2008 else
2009 Opcode = RISCV::CV_LW_rr_inc;
2010 break;
2011 default:
2012 break;
2013 }
2014 if (!Opcode)
2015 break;
2016
2017 ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, XLenVT, XLenVT,
2018 Chain.getSimpleValueType(), Base,
2019 Offset, Chain));
2020 return;
2021 }
2022 break;
2023 }
2024 case RISCVISD::LD_RV32: {
2025 assert(Subtarget->hasStdExtZilsd() && "LD_RV32 is only used with Zilsd");
2026
2028 SDValue Chain = Node->getOperand(0);
2029 SDValue Addr = Node->getOperand(1);
2031
2032 SDValue Ops[] = {Base, Offset, Chain};
2033 MachineSDNode *New = CurDAG->getMachineNode(
2034 RISCV::LD_RV32, DL, {MVT::Untyped, MVT::Other}, Ops);
2035 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, SDValue(New, 0));
2036 CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()});
2037 ReplaceUses(SDValue(Node, 0), Lo);
2038 ReplaceUses(SDValue(Node, 1), Hi);
2039 ReplaceUses(SDValue(Node, 2), SDValue(New, 1));
2040 CurDAG->RemoveDeadNode(Node);
2041 return;
2042 }
2043 case RISCVISD::SD_RV32: {
2045 SDValue Chain = Node->getOperand(0);
2046 SDValue Addr = Node->getOperand(3);
2048
2049 SDValue Lo = Node->getOperand(1);
2050 SDValue Hi = Node->getOperand(2);
2051
2052 SDValue RegPair;
2053 // Peephole to use X0_Pair for storing zero.
2055 RegPair = CurDAG->getRegister(RISCV::X0_Pair, MVT::Untyped);
2056 } else {
2057 RegPair = buildGPRPair(CurDAG, DL, MVT::Untyped, Lo, Hi);
2058 }
2059
2060 MachineSDNode *New = CurDAG->getMachineNode(RISCV::SD_RV32, DL, MVT::Other,
2061 {RegPair, Base, Offset, Chain});
2062 CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()});
2063 ReplaceUses(SDValue(Node, 0), SDValue(New, 0));
2064 CurDAG->RemoveDeadNode(Node);
2065 return;
2066 }
2067 case RISCVISD::ADDD:
2068 // Try to match WMACC pattern: ADDD where one operand pair comes from a
2069 // widening multiply.
2071 return;
2072
2073 // Fall through to regular ADDD selection.
2074 [[fallthrough]];
2075 case RISCVISD::SUBD:
2076 case RISCVISD::PPAIRE_DB:
2077 case RISCVISD::WADDAU:
2078 case RISCVISD::WSUBAU: {
2079 assert(!Subtarget->is64Bit() && "Unexpected opcode");
2080 assert(
2081 (Node->getOpcode() != RISCVISD::PPAIRE_DB || Subtarget->hasStdExtP()) &&
2082 "Unexpected opcode");
2083
2084 SDValue Op0Lo = Node->getOperand(0);
2085 SDValue Op0Hi = Node->getOperand(1);
2086
2087 SDValue Op0;
2088 if (isNullConstant(Op0Lo) && isNullConstant(Op0Hi)) {
2089 Op0 = CurDAG->getRegister(RISCV::X0_Pair, MVT::Untyped);
2090 } else {
2091 Op0 = buildGPRPair(CurDAG, DL, MVT::Untyped, Op0Lo, Op0Hi);
2092 }
2093
2094 SDValue Op1Lo = Node->getOperand(2);
2095 SDValue Op1Hi = Node->getOperand(3);
2096
2097 MachineSDNode *New;
2098 if (Opcode == RISCVISD::WADDAU || Opcode == RISCVISD::WSUBAU) {
2099 // WADDAU/WSUBAU: Op0 is the accumulator (GPRPair), Op1Lo and Op1Hi are
2100 // the two 32-bit values.
2101 unsigned Opc = Opcode == RISCVISD::WADDAU ? RISCV::WADDAU : RISCV::WSUBAU;
2102 New = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Op0, Op1Lo, Op1Hi);
2103 } else {
2104 SDValue Op1 = buildGPRPair(CurDAG, DL, MVT::Untyped, Op1Lo, Op1Hi);
2105
2106 unsigned Opc;
2107 switch (Opcode) {
2108 default:
2109 llvm_unreachable("Unexpected opcode");
2110 case RISCVISD::ADDD:
2111 Opc = RISCV::ADDD;
2112 break;
2113 case RISCVISD::SUBD:
2114 Opc = RISCV::SUBD;
2115 break;
2116 case RISCVISD::PPAIRE_DB:
2117 Opc = RISCV::PPAIRE_DB;
2118 break;
2119 }
2120 New = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Op0, Op1);
2121 }
2122
2123 auto [Lo, Hi] = extractGPRPair(CurDAG, DL, SDValue(New, 0));
2124 ReplaceUses(SDValue(Node, 0), Lo);
2125 ReplaceUses(SDValue(Node, 1), Hi);
2126 CurDAG->RemoveDeadNode(Node);
2127 return;
2128 }
2130 unsigned IntNo = Node->getConstantOperandVal(0);
2131 switch (IntNo) {
2132 // By default we do not custom select any intrinsic.
2133 default:
2134 break;
2135 case Intrinsic::riscv_vmsgeu:
2136 case Intrinsic::riscv_vmsge: {
2137 SDValue Src1 = Node->getOperand(1);
2138 SDValue Src2 = Node->getOperand(2);
2139 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
2140 bool IsCmpConstant = false;
2141 bool IsCmpMinimum = false;
2142 // Only custom select scalar second operand.
2143 if (Src2.getValueType() != XLenVT)
2144 break;
2145 // Small constants are handled with patterns.
2146 int64_t CVal = 0;
2147 MVT Src1VT = Src1.getSimpleValueType();
2148 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
2149 IsCmpConstant = true;
2150 CVal = C->getSExtValue();
2151 if (CVal >= -15 && CVal <= 16) {
2152 if (!IsUnsigned || CVal != 0)
2153 break;
2154 IsCmpMinimum = true;
2155 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
2156 Src1VT.getScalarSizeInBits())
2157 .getSExtValue()) {
2158 IsCmpMinimum = true;
2159 }
2160 }
2161 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode, VMSGTOpcode;
2162 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
2163 default:
2164 llvm_unreachable("Unexpected LMUL!");
2165#define CASE_VMSLT_OPCODES(lmulenum, suffix) \
2166 case RISCVVType::lmulenum: \
2167 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
2168 : RISCV::PseudoVMSLT_VX_##suffix; \
2169 VMSGTOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix \
2170 : RISCV::PseudoVMSGT_VX_##suffix; \
2171 break;
2172 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
2173 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
2174 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
2175 CASE_VMSLT_OPCODES(LMUL_1, M1)
2176 CASE_VMSLT_OPCODES(LMUL_2, M2)
2177 CASE_VMSLT_OPCODES(LMUL_4, M4)
2178 CASE_VMSLT_OPCODES(LMUL_8, M8)
2179#undef CASE_VMSLT_OPCODES
2180 }
2181 // Mask operations use the LMUL from the mask type.
2182 switch (RISCVTargetLowering::getLMUL(VT)) {
2183 default:
2184 llvm_unreachable("Unexpected LMUL!");
2185#define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix) \
2186 case RISCVVType::lmulenum: \
2187 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \
2188 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix; \
2189 break;
2190 CASE_VMNAND_VMSET_OPCODES(LMUL_F8, B64)
2191 CASE_VMNAND_VMSET_OPCODES(LMUL_F4, B32)
2192 CASE_VMNAND_VMSET_OPCODES(LMUL_F2, B16)
2193 CASE_VMNAND_VMSET_OPCODES(LMUL_1, B8)
2194 CASE_VMNAND_VMSET_OPCODES(LMUL_2, B4)
2195 CASE_VMNAND_VMSET_OPCODES(LMUL_4, B2)
2196 CASE_VMNAND_VMSET_OPCODES(LMUL_8, B1)
2197#undef CASE_VMNAND_VMSET_OPCODES
2198 }
2199 SDValue SEW = CurDAG->getTargetConstant(
2200 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
2201 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
2202 SDValue VL;
2203 selectVLOp(Node->getOperand(3), VL);
2204
2205 // If vmsge(u) with minimum value, expand it to vmset.
2206 if (IsCmpMinimum) {
2208 CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, MaskSEW));
2209 return;
2210 }
2211
2212 if (IsCmpConstant) {
2213 SDValue Imm =
2214 selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget);
2215
2216 ReplaceNode(Node, CurDAG->getMachineNode(VMSGTOpcode, DL, VT,
2217 {Src1, Imm, VL, SEW}));
2218 return;
2219 }
2220
2221 // Expand to
2222 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
2223 SDValue Cmp = SDValue(
2224 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
2225 0);
2226 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT,
2227 {Cmp, Cmp, VL, MaskSEW}));
2228 return;
2229 }
2230 case Intrinsic::riscv_vmsgeu_mask:
2231 case Intrinsic::riscv_vmsge_mask: {
2232 SDValue Src1 = Node->getOperand(2);
2233 SDValue Src2 = Node->getOperand(3);
2234 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
2235 bool IsCmpConstant = false;
2236 bool IsCmpMinimum = false;
2237 // Only custom select scalar second operand.
2238 if (Src2.getValueType() != XLenVT)
2239 break;
2240 // Small constants are handled with patterns.
2241 MVT Src1VT = Src1.getSimpleValueType();
2242 int64_t CVal = 0;
2243 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
2244 IsCmpConstant = true;
2245 CVal = C->getSExtValue();
2246 if (CVal >= -15 && CVal <= 16) {
2247 if (!IsUnsigned || CVal != 0)
2248 break;
2249 IsCmpMinimum = true;
2250 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
2251 Src1VT.getScalarSizeInBits())
2252 .getSExtValue()) {
2253 IsCmpMinimum = true;
2254 }
2255 }
2256 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
2257 VMOROpcode, VMSGTMaskOpcode;
2258 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
2259 default:
2260 llvm_unreachable("Unexpected LMUL!");
2261#define CASE_VMSLT_OPCODES(lmulenum, suffix) \
2262 case RISCVVType::lmulenum: \
2263 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
2264 : RISCV::PseudoVMSLT_VX_##suffix; \
2265 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \
2266 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \
2267 VMSGTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix##_MASK \
2268 : RISCV::PseudoVMSGT_VX_##suffix##_MASK; \
2269 break;
2270 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
2271 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
2272 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
2273 CASE_VMSLT_OPCODES(LMUL_1, M1)
2274 CASE_VMSLT_OPCODES(LMUL_2, M2)
2275 CASE_VMSLT_OPCODES(LMUL_4, M4)
2276 CASE_VMSLT_OPCODES(LMUL_8, M8)
2277#undef CASE_VMSLT_OPCODES
2278 }
2279 // Mask operations use the LMUL from the mask type.
2280 switch (RISCVTargetLowering::getLMUL(VT)) {
2281 default:
2282 llvm_unreachable("Unexpected LMUL!");
2283#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \
2284 case RISCVVType::lmulenum: \
2285 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \
2286 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \
2287 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \
2288 break;
2289 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, B64)
2290 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, B32)
2291 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, B16)
2296#undef CASE_VMXOR_VMANDN_VMOR_OPCODES
2297 }
2298 SDValue SEW = CurDAG->getTargetConstant(
2299 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
2300 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
2301 SDValue VL;
2302 selectVLOp(Node->getOperand(5), VL);
2303 SDValue MaskedOff = Node->getOperand(1);
2304 SDValue Mask = Node->getOperand(4);
2305
2306 // If vmsge(u) with minimum value, expand it to vmor mask, maskedoff.
2307 if (IsCmpMinimum) {
2308 // We don't need vmor if the MaskedOff and the Mask are the same
2309 // value.
2310 if (Mask == MaskedOff) {
2311 ReplaceUses(Node, Mask.getNode());
2312 return;
2313 }
2315 CurDAG->getMachineNode(VMOROpcode, DL, VT,
2316 {Mask, MaskedOff, VL, MaskSEW}));
2317 return;
2318 }
2319
2320 // If the MaskedOff value and the Mask are the same value use
2321 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
2322 // This avoids needing to copy v0 to vd before starting the next sequence.
2323 if (Mask == MaskedOff) {
2324 SDValue Cmp = SDValue(
2325 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
2326 0);
2327 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
2328 {Mask, Cmp, VL, MaskSEW}));
2329 return;
2330 }
2331
2332 SDValue PolicyOp =
2333 CurDAG->getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT);
2334
2335 if (IsCmpConstant) {
2336 SDValue Imm =
2337 selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget);
2338
2339 ReplaceNode(Node, CurDAG->getMachineNode(
2340 VMSGTMaskOpcode, DL, VT,
2341 {MaskedOff, Src1, Imm, Mask, VL, SEW, PolicyOp}));
2342 return;
2343 }
2344
2345 // Otherwise use
2346 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
2347 // The result is mask undisturbed.
2348 // We use the same instructions to emulate mask agnostic behavior, because
2349 // the agnostic result can be either undisturbed or all 1.
2350 SDValue Cmp = SDValue(CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
2351 {MaskedOff, Src1, Src2, Mask,
2352 VL, SEW, PolicyOp}),
2353 0);
2354 // vmxor.mm vd, vd, v0 is used to update active value.
2355 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
2356 {Cmp, Mask, VL, MaskSEW}));
2357 return;
2358 }
2359 case Intrinsic::riscv_vsetvli:
2360 case Intrinsic::riscv_vsetvlimax:
2361 return selectVSETVLI(Node);
2362 case Intrinsic::riscv_sf_vsettnt:
2363 case Intrinsic::riscv_sf_vsettm:
2364 case Intrinsic::riscv_sf_vsettk:
2365 return selectXSfmmVSET(Node);
2366 }
2367 break;
2368 }
2370 unsigned IntNo = Node->getConstantOperandVal(1);
2371 switch (IntNo) {
2372 // By default we do not custom select any intrinsic.
2373 default:
2374 break;
2375 case Intrinsic::riscv_vlseg2:
2376 case Intrinsic::riscv_vlseg3:
2377 case Intrinsic::riscv_vlseg4:
2378 case Intrinsic::riscv_vlseg5:
2379 case Intrinsic::riscv_vlseg6:
2380 case Intrinsic::riscv_vlseg7:
2381 case Intrinsic::riscv_vlseg8: {
2382 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2383 /*IsStrided*/ false);
2384 return;
2385 }
2386 case Intrinsic::riscv_vlseg2_mask:
2387 case Intrinsic::riscv_vlseg3_mask:
2388 case Intrinsic::riscv_vlseg4_mask:
2389 case Intrinsic::riscv_vlseg5_mask:
2390 case Intrinsic::riscv_vlseg6_mask:
2391 case Intrinsic::riscv_vlseg7_mask:
2392 case Intrinsic::riscv_vlseg8_mask: {
2393 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2394 /*IsStrided*/ false);
2395 return;
2396 }
2397 case Intrinsic::riscv_vlsseg2:
2398 case Intrinsic::riscv_vlsseg3:
2399 case Intrinsic::riscv_vlsseg4:
2400 case Intrinsic::riscv_vlsseg5:
2401 case Intrinsic::riscv_vlsseg6:
2402 case Intrinsic::riscv_vlsseg7:
2403 case Intrinsic::riscv_vlsseg8: {
2404 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2405 /*IsStrided*/ true);
2406 return;
2407 }
2408 case Intrinsic::riscv_vlsseg2_mask:
2409 case Intrinsic::riscv_vlsseg3_mask:
2410 case Intrinsic::riscv_vlsseg4_mask:
2411 case Intrinsic::riscv_vlsseg5_mask:
2412 case Intrinsic::riscv_vlsseg6_mask:
2413 case Intrinsic::riscv_vlsseg7_mask:
2414 case Intrinsic::riscv_vlsseg8_mask: {
2415 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2416 /*IsStrided*/ true);
2417 return;
2418 }
2419 case Intrinsic::riscv_vloxseg2:
2420 case Intrinsic::riscv_vloxseg3:
2421 case Intrinsic::riscv_vloxseg4:
2422 case Intrinsic::riscv_vloxseg5:
2423 case Intrinsic::riscv_vloxseg6:
2424 case Intrinsic::riscv_vloxseg7:
2425 case Intrinsic::riscv_vloxseg8:
2426 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2427 /*IsOrdered*/ true);
2428 return;
2429 case Intrinsic::riscv_vluxseg2:
2430 case Intrinsic::riscv_vluxseg3:
2431 case Intrinsic::riscv_vluxseg4:
2432 case Intrinsic::riscv_vluxseg5:
2433 case Intrinsic::riscv_vluxseg6:
2434 case Intrinsic::riscv_vluxseg7:
2435 case Intrinsic::riscv_vluxseg8:
2436 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2437 /*IsOrdered*/ false);
2438 return;
2439 case Intrinsic::riscv_vloxseg2_mask:
2440 case Intrinsic::riscv_vloxseg3_mask:
2441 case Intrinsic::riscv_vloxseg4_mask:
2442 case Intrinsic::riscv_vloxseg5_mask:
2443 case Intrinsic::riscv_vloxseg6_mask:
2444 case Intrinsic::riscv_vloxseg7_mask:
2445 case Intrinsic::riscv_vloxseg8_mask:
2446 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2447 /*IsOrdered*/ true);
2448 return;
2449 case Intrinsic::riscv_vluxseg2_mask:
2450 case Intrinsic::riscv_vluxseg3_mask:
2451 case Intrinsic::riscv_vluxseg4_mask:
2452 case Intrinsic::riscv_vluxseg5_mask:
2453 case Intrinsic::riscv_vluxseg6_mask:
2454 case Intrinsic::riscv_vluxseg7_mask:
2455 case Intrinsic::riscv_vluxseg8_mask:
2456 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2457 /*IsOrdered*/ false);
2458 return;
2459 case Intrinsic::riscv_vlseg8ff:
2460 case Intrinsic::riscv_vlseg7ff:
2461 case Intrinsic::riscv_vlseg6ff:
2462 case Intrinsic::riscv_vlseg5ff:
2463 case Intrinsic::riscv_vlseg4ff:
2464 case Intrinsic::riscv_vlseg3ff:
2465 case Intrinsic::riscv_vlseg2ff: {
2466 selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ false);
2467 return;
2468 }
2469 case Intrinsic::riscv_vlseg8ff_mask:
2470 case Intrinsic::riscv_vlseg7ff_mask:
2471 case Intrinsic::riscv_vlseg6ff_mask:
2472 case Intrinsic::riscv_vlseg5ff_mask:
2473 case Intrinsic::riscv_vlseg4ff_mask:
2474 case Intrinsic::riscv_vlseg3ff_mask:
2475 case Intrinsic::riscv_vlseg2ff_mask: {
2476 selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ true);
2477 return;
2478 }
2479 case Intrinsic::riscv_vloxei:
2480 case Intrinsic::riscv_vloxei_mask:
2481 case Intrinsic::riscv_vluxei:
2482 case Intrinsic::riscv_vluxei_mask: {
2483 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
2484 IntNo == Intrinsic::riscv_vluxei_mask;
2485 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
2486 IntNo == Intrinsic::riscv_vloxei_mask;
2487
2488 MVT VT = Node->getSimpleValueType(0);
2489 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2490
2491 unsigned CurOp = 2;
2492 SmallVector<SDValue, 8> Operands;
2493 Operands.push_back(Node->getOperand(CurOp++));
2494
2495 MVT IndexVT;
2496 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2497 /*IsStridedOrIndexed*/ true, Operands,
2498 /*IsLoad=*/true, &IndexVT);
2499
2501 "Element count mismatch");
2502
2505 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2506 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2507 reportFatalUsageError("The V extension does not support EEW=64 for "
2508 "index values when XLEN=32");
2509 }
2510 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
2511 IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
2512 static_cast<unsigned>(IndexLMUL));
2513 MachineSDNode *Load =
2514 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2515
2516 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2517
2518 ReplaceNode(Node, Load);
2519 return;
2520 }
2521 case Intrinsic::riscv_vlm:
2522 case Intrinsic::riscv_vle:
2523 case Intrinsic::riscv_vle_mask:
2524 case Intrinsic::riscv_vlse:
2525 case Intrinsic::riscv_vlse_mask: {
2526 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
2527 IntNo == Intrinsic::riscv_vlse_mask;
2528 bool IsStrided =
2529 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
2530
2531 MVT VT = Node->getSimpleValueType(0);
2532 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2533
2534 // The riscv_vlm intrinsic are always tail agnostic and no passthru
2535 // operand at the IR level. In pseudos, they have both policy and
2536 // passthru operand. The passthru operand is needed to track the
2537 // "tail undefined" state, and the policy is there just for
2538 // for consistency - it will always be "don't care" for the
2539 // unmasked form.
2540 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
2541 unsigned CurOp = 2;
2542 SmallVector<SDValue, 8> Operands;
2543 if (HasPassthruOperand)
2544 Operands.push_back(Node->getOperand(CurOp++));
2545 else {
2546 // We eagerly lower to implicit_def (instead of undef), as we
2547 // otherwise fail to select nodes such as: nxv1i1 = undef
2548 SDNode *Passthru =
2549 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
2550 Operands.push_back(SDValue(Passthru, 0));
2551 }
2552 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2553 Operands, /*IsLoad=*/true);
2554
2556 const RISCV::VLEPseudo *P =
2557 RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW,
2558 static_cast<unsigned>(LMUL));
2559 MachineSDNode *Load =
2560 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2561
2562 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2563
2564 ReplaceNode(Node, Load);
2565 return;
2566 }
2567 case Intrinsic::riscv_vleff:
2568 case Intrinsic::riscv_vleff_mask: {
2569 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
2570
2571 MVT VT = Node->getSimpleValueType(0);
2572 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2573
2574 unsigned CurOp = 2;
2575 SmallVector<SDValue, 7> Operands;
2576 Operands.push_back(Node->getOperand(CurOp++));
2577 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2578 /*IsStridedOrIndexed*/ false, Operands,
2579 /*IsLoad=*/true);
2580
2582 const RISCV::VLEPseudo *P =
2583 RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true,
2584 Log2SEW, static_cast<unsigned>(LMUL));
2585 MachineSDNode *Load = CurDAG->getMachineNode(
2586 P->Pseudo, DL, Node->getVTList(), Operands);
2587 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2588
2589 ReplaceNode(Node, Load);
2590 return;
2591 }
2592 case Intrinsic::riscv_nds_vln:
2593 case Intrinsic::riscv_nds_vln_mask:
2594 case Intrinsic::riscv_nds_vlnu:
2595 case Intrinsic::riscv_nds_vlnu_mask: {
2596 bool IsMasked = IntNo == Intrinsic::riscv_nds_vln_mask ||
2597 IntNo == Intrinsic::riscv_nds_vlnu_mask;
2598 bool IsUnsigned = IntNo == Intrinsic::riscv_nds_vlnu ||
2599 IntNo == Intrinsic::riscv_nds_vlnu_mask;
2600
2601 MVT VT = Node->getSimpleValueType(0);
2602 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2603 unsigned CurOp = 2;
2604 SmallVector<SDValue, 8> Operands;
2605
2606 Operands.push_back(Node->getOperand(CurOp++));
2607 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2608 /*IsStridedOrIndexed=*/false, Operands,
2609 /*IsLoad=*/true);
2610
2612 const RISCV::NDSVLNPseudo *P = RISCV::getNDSVLNPseudo(
2613 IsMasked, IsUnsigned, Log2SEW, static_cast<unsigned>(LMUL));
2614 MachineSDNode *Load =
2615 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2616
2617 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2618 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
2619
2620 ReplaceNode(Node, Load);
2621 return;
2622 }
2623 }
2624 break;
2625 }
2626 case ISD::INTRINSIC_VOID: {
2627 unsigned IntNo = Node->getConstantOperandVal(1);
2628 switch (IntNo) {
2629 case Intrinsic::riscv_vsseg2:
2630 case Intrinsic::riscv_vsseg3:
2631 case Intrinsic::riscv_vsseg4:
2632 case Intrinsic::riscv_vsseg5:
2633 case Intrinsic::riscv_vsseg6:
2634 case Intrinsic::riscv_vsseg7:
2635 case Intrinsic::riscv_vsseg8: {
2636 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2637 /*IsStrided*/ false);
2638 return;
2639 }
2640 case Intrinsic::riscv_vsseg2_mask:
2641 case Intrinsic::riscv_vsseg3_mask:
2642 case Intrinsic::riscv_vsseg4_mask:
2643 case Intrinsic::riscv_vsseg5_mask:
2644 case Intrinsic::riscv_vsseg6_mask:
2645 case Intrinsic::riscv_vsseg7_mask:
2646 case Intrinsic::riscv_vsseg8_mask: {
2647 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2648 /*IsStrided*/ false);
2649 return;
2650 }
2651 case Intrinsic::riscv_vssseg2:
2652 case Intrinsic::riscv_vssseg3:
2653 case Intrinsic::riscv_vssseg4:
2654 case Intrinsic::riscv_vssseg5:
2655 case Intrinsic::riscv_vssseg6:
2656 case Intrinsic::riscv_vssseg7:
2657 case Intrinsic::riscv_vssseg8: {
2658 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2659 /*IsStrided*/ true);
2660 return;
2661 }
2662 case Intrinsic::riscv_vssseg2_mask:
2663 case Intrinsic::riscv_vssseg3_mask:
2664 case Intrinsic::riscv_vssseg4_mask:
2665 case Intrinsic::riscv_vssseg5_mask:
2666 case Intrinsic::riscv_vssseg6_mask:
2667 case Intrinsic::riscv_vssseg7_mask:
2668 case Intrinsic::riscv_vssseg8_mask: {
2669 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2670 /*IsStrided*/ true);
2671 return;
2672 }
2673 case Intrinsic::riscv_vsoxseg2:
2674 case Intrinsic::riscv_vsoxseg3:
2675 case Intrinsic::riscv_vsoxseg4:
2676 case Intrinsic::riscv_vsoxseg5:
2677 case Intrinsic::riscv_vsoxseg6:
2678 case Intrinsic::riscv_vsoxseg7:
2679 case Intrinsic::riscv_vsoxseg8:
2680 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2681 /*IsOrdered*/ true);
2682 return;
2683 case Intrinsic::riscv_vsuxseg2:
2684 case Intrinsic::riscv_vsuxseg3:
2685 case Intrinsic::riscv_vsuxseg4:
2686 case Intrinsic::riscv_vsuxseg5:
2687 case Intrinsic::riscv_vsuxseg6:
2688 case Intrinsic::riscv_vsuxseg7:
2689 case Intrinsic::riscv_vsuxseg8:
2690 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2691 /*IsOrdered*/ false);
2692 return;
2693 case Intrinsic::riscv_vsoxseg2_mask:
2694 case Intrinsic::riscv_vsoxseg3_mask:
2695 case Intrinsic::riscv_vsoxseg4_mask:
2696 case Intrinsic::riscv_vsoxseg5_mask:
2697 case Intrinsic::riscv_vsoxseg6_mask:
2698 case Intrinsic::riscv_vsoxseg7_mask:
2699 case Intrinsic::riscv_vsoxseg8_mask:
2700 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2701 /*IsOrdered*/ true);
2702 return;
2703 case Intrinsic::riscv_vsuxseg2_mask:
2704 case Intrinsic::riscv_vsuxseg3_mask:
2705 case Intrinsic::riscv_vsuxseg4_mask:
2706 case Intrinsic::riscv_vsuxseg5_mask:
2707 case Intrinsic::riscv_vsuxseg6_mask:
2708 case Intrinsic::riscv_vsuxseg7_mask:
2709 case Intrinsic::riscv_vsuxseg8_mask:
2710 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2711 /*IsOrdered*/ false);
2712 return;
2713 case Intrinsic::riscv_vsoxei:
2714 case Intrinsic::riscv_vsoxei_mask:
2715 case Intrinsic::riscv_vsuxei:
2716 case Intrinsic::riscv_vsuxei_mask: {
2717 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
2718 IntNo == Intrinsic::riscv_vsuxei_mask;
2719 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
2720 IntNo == Intrinsic::riscv_vsoxei_mask;
2721
2722 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2723 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2724
2725 unsigned CurOp = 2;
2726 SmallVector<SDValue, 8> Operands;
2727 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2728
2729 MVT IndexVT;
2730 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2731 /*IsStridedOrIndexed*/ true, Operands,
2732 /*IsLoad=*/false, &IndexVT);
2733
2735 "Element count mismatch");
2736
2739 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2740 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2741 reportFatalUsageError("The V extension does not support EEW=64 for "
2742 "index values when XLEN=32");
2743 }
2744 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
2745 IsMasked, IsOrdered, IndexLog2EEW,
2746 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
2747 MachineSDNode *Store =
2748 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2749
2750 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
2751
2752 ReplaceNode(Node, Store);
2753 return;
2754 }
2755 case Intrinsic::riscv_vsm:
2756 case Intrinsic::riscv_vse:
2757 case Intrinsic::riscv_vse_mask:
2758 case Intrinsic::riscv_vsse:
2759 case Intrinsic::riscv_vsse_mask: {
2760 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
2761 IntNo == Intrinsic::riscv_vsse_mask;
2762 bool IsStrided =
2763 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
2764
2765 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2766 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2767
2768 unsigned CurOp = 2;
2769 SmallVector<SDValue, 8> Operands;
2770 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2771
2772 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2773 Operands);
2774
2776 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
2777 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
2778 MachineSDNode *Store =
2779 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2780 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
2781
2782 ReplaceNode(Node, Store);
2783 return;
2784 }
2785 case Intrinsic::riscv_sf_vc_x_se:
2786 case Intrinsic::riscv_sf_vc_i_se:
2788 return;
2789 case Intrinsic::riscv_sf_vlte8:
2790 case Intrinsic::riscv_sf_vlte16:
2791 case Intrinsic::riscv_sf_vlte32:
2792 case Intrinsic::riscv_sf_vlte64: {
2793 unsigned Log2SEW;
2794 unsigned PseudoInst;
2795 switch (IntNo) {
2796 case Intrinsic::riscv_sf_vlte8:
2797 PseudoInst = RISCV::PseudoSF_VLTE8;
2798 Log2SEW = 3;
2799 break;
2800 case Intrinsic::riscv_sf_vlte16:
2801 PseudoInst = RISCV::PseudoSF_VLTE16;
2802 Log2SEW = 4;
2803 break;
2804 case Intrinsic::riscv_sf_vlte32:
2805 PseudoInst = RISCV::PseudoSF_VLTE32;
2806 Log2SEW = 5;
2807 break;
2808 case Intrinsic::riscv_sf_vlte64:
2809 PseudoInst = RISCV::PseudoSF_VLTE64;
2810 Log2SEW = 6;
2811 break;
2812 }
2813
2814 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
2815 SDValue TWidenOp = CurDAG->getTargetConstant(1, DL, XLenVT);
2816 SDValue Operands[] = {Node->getOperand(2),
2817 Node->getOperand(3),
2818 Node->getOperand(4),
2819 SEWOp,
2820 TWidenOp,
2821 Node->getOperand(0)};
2822
2823 MachineSDNode *TileLoad =
2824 CurDAG->getMachineNode(PseudoInst, DL, Node->getVTList(), Operands);
2825 CurDAG->setNodeMemRefs(TileLoad,
2826 {cast<MemSDNode>(Node)->getMemOperand()});
2827
2828 ReplaceNode(Node, TileLoad);
2829 return;
2830 }
2831 case Intrinsic::riscv_sf_mm_s_s:
2832 case Intrinsic::riscv_sf_mm_s_u:
2833 case Intrinsic::riscv_sf_mm_u_s:
2834 case Intrinsic::riscv_sf_mm_u_u:
2835 case Intrinsic::riscv_sf_mm_e5m2_e5m2:
2836 case Intrinsic::riscv_sf_mm_e5m2_e4m3:
2837 case Intrinsic::riscv_sf_mm_e4m3_e5m2:
2838 case Intrinsic::riscv_sf_mm_e4m3_e4m3:
2839 case Intrinsic::riscv_sf_mm_f_f: {
2840 bool HasFRM = false;
2841 unsigned PseudoInst;
2842 switch (IntNo) {
2843 case Intrinsic::riscv_sf_mm_s_s:
2844 PseudoInst = RISCV::PseudoSF_MM_S_S;
2845 break;
2846 case Intrinsic::riscv_sf_mm_s_u:
2847 PseudoInst = RISCV::PseudoSF_MM_S_U;
2848 break;
2849 case Intrinsic::riscv_sf_mm_u_s:
2850 PseudoInst = RISCV::PseudoSF_MM_U_S;
2851 break;
2852 case Intrinsic::riscv_sf_mm_u_u:
2853 PseudoInst = RISCV::PseudoSF_MM_U_U;
2854 break;
2855 case Intrinsic::riscv_sf_mm_e5m2_e5m2:
2856 PseudoInst = RISCV::PseudoSF_MM_E5M2_E5M2;
2857 HasFRM = true;
2858 break;
2859 case Intrinsic::riscv_sf_mm_e5m2_e4m3:
2860 PseudoInst = RISCV::PseudoSF_MM_E5M2_E4M3;
2861 HasFRM = true;
2862 break;
2863 case Intrinsic::riscv_sf_mm_e4m3_e5m2:
2864 PseudoInst = RISCV::PseudoSF_MM_E4M3_E5M2;
2865 HasFRM = true;
2866 break;
2867 case Intrinsic::riscv_sf_mm_e4m3_e4m3:
2868 PseudoInst = RISCV::PseudoSF_MM_E4M3_E4M3;
2869 HasFRM = true;
2870 break;
2871 case Intrinsic::riscv_sf_mm_f_f:
2872 if (Node->getOperand(3).getValueType().getScalarType() == MVT::bf16)
2873 PseudoInst = RISCV::PseudoSF_MM_F_F_ALT;
2874 else
2875 PseudoInst = RISCV::PseudoSF_MM_F_F;
2876 HasFRM = true;
2877 break;
2878 }
2879 uint64_t TileNum = Node->getConstantOperandVal(2);
2880 SDValue Op1 = Node->getOperand(3);
2881 SDValue Op2 = Node->getOperand(4);
2882 MVT VT = Op1->getSimpleValueType(0);
2883 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2884 SDValue TmOp = Node->getOperand(5);
2885 SDValue TnOp = Node->getOperand(6);
2886 SDValue TkOp = Node->getOperand(7);
2887 SDValue TWidenOp = Node->getOperand(8);
2888 SDValue Chain = Node->getOperand(0);
2889
2890 // sf.mm.f.f with sew=32, twiden=2 is invalid
2891 if (IntNo == Intrinsic::riscv_sf_mm_f_f && Log2SEW == 5 &&
2892 TWidenOp->getAsZExtVal() == 2)
2893 reportFatalUsageError("sf.mm.f.f doesn't support (sew=32, twiden=2)");
2894
2895 SmallVector<SDValue, 10> Operands(
2896 {CurDAG->getRegister(getTileReg(TileNum), XLenVT), Op1, Op2});
2897 if (HasFRM)
2898 Operands.push_back(
2899 CurDAG->getTargetConstant(RISCVFPRndMode::DYN, DL, XLenVT));
2900 Operands.append({TmOp, TnOp, TkOp,
2901 CurDAG->getTargetConstant(Log2SEW, DL, XLenVT), TWidenOp,
2902 Chain});
2903
2904 auto *NewNode =
2905 CurDAG->getMachineNode(PseudoInst, DL, Node->getVTList(), Operands);
2906
2907 ReplaceNode(Node, NewNode);
2908 return;
2909 }
2910 case Intrinsic::riscv_sf_vtzero_t: {
2911 uint64_t TileNum = Node->getConstantOperandVal(2);
2912 SDValue Tm = Node->getOperand(3);
2913 SDValue Tn = Node->getOperand(4);
2914 SDValue Log2SEW = Node->getOperand(5);
2915 SDValue TWiden = Node->getOperand(6);
2916 SDValue Chain = Node->getOperand(0);
2917 auto *NewNode = CurDAG->getMachineNode(
2918 RISCV::PseudoSF_VTZERO_T, DL, Node->getVTList(),
2919 {CurDAG->getRegister(getTileReg(TileNum), XLenVT), Tm, Tn, Log2SEW,
2920 TWiden, Chain});
2921
2922 ReplaceNode(Node, NewNode);
2923 return;
2924 }
2925 }
2926 break;
2927 }
2928 case ISD::BITCAST: {
2929 MVT SrcVT = Node->getOperand(0).getSimpleValueType();
2930 // Just drop bitcasts between vectors if both are fixed or both are
2931 // scalable.
2932 if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
2933 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
2934 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2935 CurDAG->RemoveDeadNode(Node);
2936 return;
2937 }
2938 if (Subtarget->hasStdExtP()) {
2939 bool Is32BitCast =
2940 (VT == MVT::i32 && (SrcVT == MVT::v4i8 || SrcVT == MVT::v2i16)) ||
2941 (SrcVT == MVT::i32 && (VT == MVT::v4i8 || VT == MVT::v2i16));
2942 bool Is64BitCast =
2943 (VT == MVT::i64 && (SrcVT == MVT::v8i8 || SrcVT == MVT::v4i16 ||
2944 SrcVT == MVT::v2i32)) ||
2945 (SrcVT == MVT::i64 &&
2946 (VT == MVT::v8i8 || VT == MVT::v4i16 || VT == MVT::v2i32));
2947 if (Is32BitCast || Is64BitCast) {
2948 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2949 CurDAG->RemoveDeadNode(Node);
2950 return;
2951 }
2952 }
2953 break;
2954 }
2955 case ISD::SPLAT_VECTOR: {
2956 if (!Subtarget->hasStdExtP())
2957 break;
2958 auto *ConstNode = dyn_cast<ConstantSDNode>(Node->getOperand(0));
2959 if (!ConstNode)
2960 break;
2961
2962 if (ConstNode->isZero()) {
2963 SDValue New =
2964 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
2965 ReplaceNode(Node, New.getNode());
2966 return;
2967 }
2968
2969 unsigned EltSize = VT.getVectorElementType().getSizeInBits();
2970 APInt Val = ConstNode->getAPIntValue().trunc(EltSize);
2971
2972 // Use LI for all ones since it can be compressed to c.li.
2973 if (Val.isAllOnes()) {
2974 SDNode *NewNode = CurDAG->getMachineNode(
2975 RISCV::ADDI, DL, VT, CurDAG->getRegister(RISCV::X0, VT),
2976 CurDAG->getAllOnesConstant(DL, XLenVT, /*IsTarget=*/true));
2977 ReplaceNode(Node, NewNode);
2978 return;
2979 }
2980
2981 // Find the smallest splat.
2982 if (Val.getBitWidth() > 16 && Val.isSplat(16))
2983 Val = Val.trunc(16);
2984 if (Val.getBitWidth() > 8 && Val.isSplat(8))
2985 Val = Val.trunc(8);
2986
2987 EltSize = Val.getBitWidth();
2988 int64_t Imm = Val.getSExtValue();
2989
2990 unsigned Opc = 0;
2991 if (EltSize == 8) {
2992 Opc = RISCV::PLI_B;
2993 } else if (isInt<10>(Imm)) {
2994 Opc = EltSize == 32 ? RISCV::PLI_W : RISCV::PLI_H;
2995 } else if (EltSize == 16 && isShiftedInt<10, 6>(Imm)) {
2996 Opc = RISCV::PLUI_H;
2997 Imm = Imm >> 6;
2998 } else if (EltSize == 32 && isShiftedInt<10, 22>(Imm)) {
2999 Opc = RISCV::PLUI_W;
3000 Imm = Imm >> 22;
3001 }
3002
3003 if (Opc) {
3004 SDNode *NewNode = CurDAG->getMachineNode(
3005 Opc, DL, VT, CurDAG->getSignedTargetConstant(Imm, DL, XLenVT));
3006 ReplaceNode(Node, NewNode);
3007 return;
3008 }
3009
3010 break;
3011 }
3013 if (Subtarget->hasStdExtP()) {
3014 MVT SrcVT = Node->getOperand(0).getSimpleValueType();
3015 if ((VT == MVT::v2i32 && SrcVT == MVT::i64) ||
3016 (VT == MVT::v4i8 && SrcVT == MVT::i32)) {
3017 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
3018 CurDAG->RemoveDeadNode(Node);
3019 return;
3020 }
3021 }
3022 break;
3024 case RISCVISD::TUPLE_INSERT: {
3025 SDValue V = Node->getOperand(0);
3026 SDValue SubV = Node->getOperand(1);
3027 SDLoc DL(SubV);
3028 auto Idx = Node->getConstantOperandVal(2);
3029 MVT SubVecVT = SubV.getSimpleValueType();
3030
3031 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
3032 MVT SubVecContainerVT = SubVecVT;
3033 // Establish the correct scalable-vector types for any fixed-length type.
3034 if (SubVecVT.isFixedLengthVector()) {
3035 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
3037 [[maybe_unused]] bool ExactlyVecRegSized =
3038 Subtarget->expandVScale(SubVecVT.getSizeInBits())
3039 .isKnownMultipleOf(Subtarget->expandVScale(VecRegSize));
3040 assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())
3041 .getKnownMinValue()));
3042 assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
3043 }
3044 MVT ContainerVT = VT;
3045 if (VT.isFixedLengthVector())
3046 ContainerVT = TLI.getContainerForFixedLengthVector(VT);
3047
3048 const auto *TRI = Subtarget->getRegisterInfo();
3049 unsigned SubRegIdx;
3050 std::tie(SubRegIdx, Idx) =
3052 ContainerVT, SubVecContainerVT, Idx, TRI);
3053
3054 // If the Idx hasn't been completely eliminated then this is a subvector
3055 // insert which doesn't naturally align to a vector register. These must
3056 // be handled using instructions to manipulate the vector registers.
3057 if (Idx != 0)
3058 break;
3059
3060 RISCVVType::VLMUL SubVecLMUL =
3061 RISCVTargetLowering::getLMUL(SubVecContainerVT);
3062 [[maybe_unused]] bool IsSubVecPartReg =
3063 SubVecLMUL == RISCVVType::VLMUL::LMUL_F2 ||
3064 SubVecLMUL == RISCVVType::VLMUL::LMUL_F4 ||
3065 SubVecLMUL == RISCVVType::VLMUL::LMUL_F8;
3066 assert((V.getValueType().isRISCVVectorTuple() || !IsSubVecPartReg ||
3067 V.isUndef()) &&
3068 "Expecting lowering to have created legal INSERT_SUBVECTORs when "
3069 "the subvector is smaller than a full-sized register");
3070
3071 // If we haven't set a SubRegIdx, then we must be going between
3072 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
3073 if (SubRegIdx == RISCV::NoSubRegister) {
3074 unsigned InRegClassID =
3077 InRegClassID &&
3078 "Unexpected subvector extraction");
3079 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
3080 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
3081 DL, VT, SubV, RC);
3082 ReplaceNode(Node, NewNode);
3083 return;
3084 }
3085
3086 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV);
3087 ReplaceNode(Node, Insert.getNode());
3088 return;
3089 }
3091 case RISCVISD::TUPLE_EXTRACT: {
3092 SDValue V = Node->getOperand(0);
3093 auto Idx = Node->getConstantOperandVal(1);
3094 MVT InVT = V.getSimpleValueType();
3095 SDLoc DL(V);
3096
3097 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
3098 MVT SubVecContainerVT = VT;
3099 // Establish the correct scalable-vector types for any fixed-length type.
3100 if (VT.isFixedLengthVector()) {
3101 assert(Idx == 0);
3102 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
3103 }
3104 if (InVT.isFixedLengthVector())
3105 InVT = TLI.getContainerForFixedLengthVector(InVT);
3106
3107 const auto *TRI = Subtarget->getRegisterInfo();
3108 unsigned SubRegIdx;
3109 std::tie(SubRegIdx, Idx) =
3111 InVT, SubVecContainerVT, Idx, TRI);
3112
3113 // If the Idx hasn't been completely eliminated then this is a subvector
3114 // extract which doesn't naturally align to a vector register. These must
3115 // be handled using instructions to manipulate the vector registers.
3116 if (Idx != 0)
3117 break;
3118
3119 // If we haven't set a SubRegIdx, then we must be going between
3120 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
3121 if (SubRegIdx == RISCV::NoSubRegister) {
3122 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT);
3124 InRegClassID &&
3125 "Unexpected subvector extraction");
3126 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
3127 SDNode *NewNode =
3128 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
3129 ReplaceNode(Node, NewNode);
3130 return;
3131 }
3132
3133 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
3134 ReplaceNode(Node, Extract.getNode());
3135 return;
3136 }
3137 case RISCVISD::VMV_S_X_VL:
3138 case RISCVISD::VFMV_S_F_VL:
3139 case RISCVISD::VMV_V_X_VL:
3140 case RISCVISD::VFMV_V_F_VL: {
3141 // Try to match splat of a scalar load to a strided load with stride of x0.
3142 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
3143 Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
3144 if (!Node->getOperand(0).isUndef())
3145 break;
3146 SDValue Src = Node->getOperand(1);
3147 auto *Ld = dyn_cast<LoadSDNode>(Src);
3148 // Can't fold load update node because the second
3149 // output is used so that load update node can't be removed.
3150 if (!Ld || Ld->isIndexed())
3151 break;
3152 EVT MemVT = Ld->getMemoryVT();
3153 // The memory VT should be the same size as the element type.
3154 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
3155 break;
3156 if (!IsProfitableToFold(Src, Node, Node) ||
3157 !IsLegalToFold(Src, Node, Node, TM.getOptLevel()))
3158 break;
3159
3160 SDValue VL;
3161 if (IsScalarMove) {
3162 // We could deal with more VL if we update the VSETVLI insert pass to
3163 // avoid introducing more VSETVLI.
3164 if (!isOneConstant(Node->getOperand(2)))
3165 break;
3166 selectVLOp(Node->getOperand(2), VL);
3167 } else
3168 selectVLOp(Node->getOperand(2), VL);
3169
3170 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
3171 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
3172
3173 // If VL=1, then we don't need to do a strided load and can just do a
3174 // regular load.
3175 bool IsStrided = !isOneConstant(VL);
3176
3177 // Only do a strided load if we have optimized zero-stride vector load.
3178 if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
3179 break;
3180
3181 SmallVector<SDValue> Operands = {
3182 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0),
3183 Ld->getBasePtr()};
3184 if (IsStrided)
3185 Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT));
3187 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
3188 Operands.append({VL, SEW, PolicyOp, Ld->getChain()});
3189
3191 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
3192 /*IsMasked*/ false, IsStrided, /*FF*/ false,
3193 Log2SEW, static_cast<unsigned>(LMUL));
3194 MachineSDNode *Load =
3195 CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands);
3196 // Update the chain.
3197 ReplaceUses(Src.getValue(1), SDValue(Load, 1));
3198 // Record the mem-refs
3199 CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});
3200 // Replace the splat with the vlse.
3201 ReplaceNode(Node, Load);
3202 return;
3203 }
3204 case ISD::PREFETCH:
3205 // MIPS's prefetch instruction already encodes the hint within the
3206 // instruction itself, so no extra NTL hint is needed.
3207 if (Subtarget->hasVendorXMIPSCBOP())
3208 break;
3209
3210 unsigned Locality = Node->getConstantOperandVal(3);
3211 if (Locality > 2)
3212 break;
3213
3214 auto *LoadStoreMem = cast<MemSDNode>(Node);
3215 MachineMemOperand *MMO = LoadStoreMem->getMemOperand();
3217
3218 int NontemporalLevel = 0;
3219 switch (Locality) {
3220 case 0:
3221 NontemporalLevel = 3; // NTL.ALL
3222 break;
3223 case 1:
3224 NontemporalLevel = 1; // NTL.PALL
3225 break;
3226 case 2:
3227 NontemporalLevel = 0; // NTL.P1
3228 break;
3229 default:
3230 llvm_unreachable("unexpected locality value.");
3231 }
3232
3233 if (NontemporalLevel & 0b1)
3235 if (NontemporalLevel & 0b10)
3237 break;
3238 }
3239
3240 // Select the default instruction.
3241 SelectCode(Node);
3242}
3243
3245 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
3246 std::vector<SDValue> &OutOps) {
3247 // Always produce a register and immediate operand, as expected by
3248 // RISCVAsmPrinter::PrintAsmMemoryOperand.
3249 switch (ConstraintID) {
3252 SDValue Op0, Op1;
3253 [[maybe_unused]] bool Found = SelectAddrRegImm(Op, Op0, Op1);
3254 assert(Found && "SelectAddrRegImm should always succeed");
3255 OutOps.push_back(Op0);
3256 OutOps.push_back(Op1);
3257 return false;
3258 }
3260 OutOps.push_back(Op);
3261 OutOps.push_back(
3262 CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT()));
3263 return false;
3264 default:
3265 report_fatal_error("Unexpected asm memory constraint " +
3266 InlineAsm::getMemConstraintName(ConstraintID));
3267 }
3268
3269 return true;
3270}
3271
3273 SDValue &Offset) {
3274 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
3275 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
3276 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT());
3277 return true;
3278 }
3279
3280 return false;
3281}
3282
3283// Fold constant addresses.
3284static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
3285 const MVT VT, const RISCVSubtarget *Subtarget,
3287 bool IsPrefetch = false) {
3288 if (!isa<ConstantSDNode>(Addr))
3289 return false;
3290
3291 int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue();
3292
3293 // If the constant is a simm12, we can fold the whole constant and use X0 as
3294 // the base. If the constant can be materialized with LUI+simm12, use LUI as
3295 // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
3296 int64_t Lo12 = SignExtend64<12>(CVal);
3297 int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
3298 if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
3299 if (IsPrefetch && (Lo12 & 0b11111) != 0)
3300 return false;
3301 if (Hi) {
3302 int64_t Hi20 = (Hi >> 12) & 0xfffff;
3303 Base = SDValue(
3304 CurDAG->getMachineNode(RISCV::LUI, DL, VT,
3305 CurDAG->getTargetConstant(Hi20, DL, VT)),
3306 0);
3307 } else {
3308 Base = CurDAG->getRegister(RISCV::X0, VT);
3309 }
3310 Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT);
3311 return true;
3312 }
3313
3314 // Ask how constant materialization would handle this constant.
3315 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget);
3316
3317 // If the last instruction would be an ADDI, we can fold its immediate and
3318 // emit the rest of the sequence as the base.
3319 if (Seq.back().getOpcode() != RISCV::ADDI)
3320 return false;
3321 Lo12 = Seq.back().getImm();
3322 if (IsPrefetch && (Lo12 & 0b11111) != 0)
3323 return false;
3324
3325 // Drop the last instruction.
3326 Seq.pop_back();
3327 assert(!Seq.empty() && "Expected more instructions in sequence");
3328
3329 Base = selectImmSeq(CurDAG, DL, VT, Seq);
3330 Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT);
3331 return true;
3332}
3333
3334// Is this ADD instruction only used as the base pointer of scalar loads and
3335// stores?
3337 for (auto *User : Add->users()) {
3338 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
3339 User->getOpcode() != RISCVISD::LD_RV32 &&
3340 User->getOpcode() != RISCVISD::SD_RV32 &&
3341 User->getOpcode() != ISD::ATOMIC_LOAD &&
3342 User->getOpcode() != ISD::ATOMIC_STORE)
3343 return false;
3344 EVT VT = cast<MemSDNode>(User)->getMemoryVT();
3345 if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
3346 VT != MVT::f64)
3347 return false;
3348 // Don't allow stores of the value. It must be used as the address.
3349 if (User->getOpcode() == ISD::STORE &&
3350 cast<StoreSDNode>(User)->getValue() == Add)
3351 return false;
3352 if (User->getOpcode() == ISD::ATOMIC_STORE &&
3353 cast<AtomicSDNode>(User)->getVal() == Add)
3354 return false;
3355 if (User->getOpcode() == RISCVISD::SD_RV32 &&
3356 (User->getOperand(0) == Add || User->getOperand(1) == Add))
3357 return false;
3358 if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering()))
3359 return false;
3360 }
3361
3362 return true;
3363}
3364
3366 switch (User->getOpcode()) {
3367 default:
3368 return false;
3369 case ISD::LOAD:
3370 case RISCVISD::LD_RV32:
3371 case ISD::ATOMIC_LOAD:
3372 break;
3373 case ISD::STORE:
3374 // Don't allow stores of Add. It must only be used as the address.
3375 if (cast<StoreSDNode>(User)->getValue() == Add)
3376 return false;
3377 break;
3378 case RISCVISD::SD_RV32:
3379 // Don't allow stores of Add. It must only be used as the address.
3380 if (User->getOperand(0) == Add || User->getOperand(1) == Add)
3381 return false;
3382 break;
3383 case ISD::ATOMIC_STORE:
3384 // Don't allow stores of Add. It must only be used as the address.
3385 if (cast<AtomicSDNode>(User)->getVal() == Add)
3386 return false;
3387 break;
3388 }
3389
3390 return true;
3391}
3392
3393// To prevent SelectAddrRegImm from folding offsets that conflict with the
3394// fusion of PseudoMovAddr, check if the offset of every use of a given address
3395// is within the alignment.
3397 Align Alignment) {
3398 assert(Addr->getOpcode() == RISCVISD::ADD_LO);
3399 for (auto *User : Addr->users()) {
3400 // If the user is a load or store, then the offset is 0 which is always
3401 // within alignment.
3402 if (isRegImmLoadOrStore(User, Addr))
3403 continue;
3404
3405 if (CurDAG->isBaseWithConstantOffset(SDValue(User, 0))) {
3406 int64_t CVal = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
3407 if (!isInt<12>(CVal) || Alignment <= CVal)
3408 return false;
3409
3410 // Make sure all uses are foldable load/stores.
3411 for (auto *AddUser : User->users())
3412 if (!isRegImmLoadOrStore(AddUser, SDValue(User, 0)))
3413 return false;
3414
3415 continue;
3416 }
3417
3418 return false;
3419 }
3420
3421 return true;
3422}
3423
3425 SDValue &Offset) {
3426 if (SelectAddrFrameIndex(Addr, Base, Offset))
3427 return true;
3428
3429 SDLoc DL(Addr);
3430 MVT VT = Addr.getSimpleValueType();
3431
3432 if (Addr.getOpcode() == RISCVISD::ADD_LO) {
3433 bool CanFold = true;
3434 // Unconditionally fold if operand 1 is not a global address (e.g.
3435 // externsymbol)
3436 if (auto *GA = dyn_cast<GlobalAddressSDNode>(Addr.getOperand(1))) {
3437 const DataLayout &DL = CurDAG->getDataLayout();
3438 Align Alignment = commonAlignment(
3439 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
3440 if (!areOffsetsWithinAlignment(Addr, Alignment))
3441 CanFold = false;
3442 }
3443 if (CanFold) {
3444 Base = Addr.getOperand(0);
3445 Offset = Addr.getOperand(1);
3446 return true;
3447 }
3448 }
3449
3450 if (CurDAG->isBaseWithConstantOffset(Addr)) {
3451 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3452 if (isInt<12>(CVal)) {
3453 Base = Addr.getOperand(0);
3454 if (Base.getOpcode() == RISCVISD::ADD_LO) {
3455 SDValue LoOperand = Base.getOperand(1);
3456 if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) {
3457 // If the Lo in (ADD_LO hi, lo) is a global variable's address
3458 // (its low part, really), then we can rely on the alignment of that
3459 // variable to provide a margin of safety before low part can overflow
3460 // the 12 bits of the load/store offset. Check if CVal falls within
3461 // that margin; if so (low part + CVal) can't overflow.
3462 const DataLayout &DL = CurDAG->getDataLayout();
3463 Align Alignment = commonAlignment(
3464 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
3465 if ((CVal == 0 || Alignment > CVal) &&
3466 areOffsetsWithinAlignment(Base, Alignment)) {
3467 int64_t CombinedOffset = CVal + GA->getOffset();
3468 Base = Base.getOperand(0);
3469 Offset = CurDAG->getTargetGlobalAddress(
3470 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(),
3471 CombinedOffset, GA->getTargetFlags());
3472 return true;
3473 }
3474 }
3475 }
3476
3477 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
3478 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
3479 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
3480 return true;
3481 }
3482 }
3483
3484 // Handle ADD with large immediates.
3485 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
3486 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3487 assert(!isInt<12>(CVal) && "simm12 not already handled?");
3488
3489 // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
3490 // an ADDI for part of the offset and fold the rest into the load/store.
3491 // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
3492 if (CVal >= -4096 && CVal <= 4094) {
3493 int64_t Adj = CVal < 0 ? -2048 : 2047;
3494 Base = SDValue(
3495 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
3496 CurDAG->getSignedTargetConstant(Adj, DL, VT)),
3497 0);
3498 Offset = CurDAG->getSignedTargetConstant(CVal - Adj, DL, VT);
3499 return true;
3500 }
3501
3502 // For larger immediates, we might be able to save one instruction from
3503 // constant materialization by folding the Lo12 bits of the immediate into
3504 // the address. We should only do this if the ADD is only used by loads and
3505 // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
3506 // separately with the full materialized immediate creating extra
3507 // instructions.
3508 if (isWorthFoldingAdd(Addr) &&
3509 selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
3510 Offset, /*IsPrefetch=*/false)) {
3511 // Insert an ADD instruction with the materialized Hi52 bits.
3512 Base = SDValue(
3513 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
3514 0);
3515 return true;
3516 }
3517 }
3518
3519 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
3520 /*IsPrefetch=*/false))
3521 return true;
3522
3523 Base = Addr;
3524 Offset = CurDAG->getTargetConstant(0, DL, VT);
3525 return true;
3526}
3527
3528/// Similar to SelectAddrRegImm, except that the offset is restricted to uimm9.
3530 SDValue &Offset) {
3531 if (SelectAddrFrameIndex(Addr, Base, Offset))
3532 return true;
3533
3534 SDLoc DL(Addr);
3535 MVT VT = Addr.getSimpleValueType();
3536
3537 if (CurDAG->isBaseWithConstantOffset(Addr)) {
3538 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3539 if (isUInt<9>(CVal)) {
3540 Base = Addr.getOperand(0);
3541
3542 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
3543 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
3544 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
3545 return true;
3546 }
3547 }
3548
3549 Base = Addr;
3550 Offset = CurDAG->getTargetConstant(0, DL, VT);
3551 return true;
3552}
3553
3554/// Similar to SelectAddrRegImm, except that the least significant 5 bits of
3555/// Offset should be all zeros.
3557 SDValue &Offset) {
3558 if (SelectAddrFrameIndex(Addr, Base, Offset))
3559 return true;
3560
3561 SDLoc DL(Addr);
3562 MVT VT = Addr.getSimpleValueType();
3563
3564 if (CurDAG->isBaseWithConstantOffset(Addr)) {
3565 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3566 if (isInt<12>(CVal)) {
3567 Base = Addr.getOperand(0);
3568
3569 // Early-out if not a valid offset.
3570 if ((CVal & 0b11111) != 0) {
3571 Base = Addr;
3572 Offset = CurDAG->getTargetConstant(0, DL, VT);
3573 return true;
3574 }
3575
3576 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
3577 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
3578 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
3579 return true;
3580 }
3581 }
3582
3583 // Handle ADD with large immediates.
3584 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
3585 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3586 assert(!isInt<12>(CVal) && "simm12 not already handled?");
3587
3588 // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
3589 // one instruction by folding adjustment (-2048 or 2016) into the address.
3590 if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {
3591 int64_t Adj = CVal < 0 ? -2048 : 2016;
3592 int64_t AdjustedOffset = CVal - Adj;
3593 Base =
3594 SDValue(CurDAG->getMachineNode(
3595 RISCV::ADDI, DL, VT, Addr.getOperand(0),
3596 CurDAG->getSignedTargetConstant(AdjustedOffset, DL, VT)),
3597 0);
3598 Offset = CurDAG->getSignedTargetConstant(Adj, DL, VT);
3599 return true;
3600 }
3601
3602 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
3603 Offset, /*IsPrefetch=*/true)) {
3604 // Insert an ADD instruction with the materialized Hi52 bits.
3605 Base = SDValue(
3606 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
3607 0);
3608 return true;
3609 }
3610 }
3611
3612 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
3613 /*IsPrefetch=*/true))
3614 return true;
3615
3616 Base = Addr;
3617 Offset = CurDAG->getTargetConstant(0, DL, VT);
3618 return true;
3619}
3620
3621/// Return true if this a load/store that we have a RegRegScale instruction for.
3623 const RISCVSubtarget &Subtarget) {
3624 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE)
3625 return false;
3626 EVT VT = cast<MemSDNode>(User)->getMemoryVT();
3627 if (!(VT.isScalarInteger() &&
3628 (Subtarget.hasVendorXTHeadMemIdx() || Subtarget.hasVendorXqcisls())) &&
3629 !((VT == MVT::f32 || VT == MVT::f64) &&
3630 Subtarget.hasVendorXTHeadFMemIdx()))
3631 return false;
3632 // Don't allow stores of the value. It must be used as the address.
3633 if (User->getOpcode() == ISD::STORE &&
3634 cast<StoreSDNode>(User)->getValue() == Add)
3635 return false;
3636
3637 return true;
3638}
3639
3640/// Is it profitable to fold this Add into RegRegScale load/store. If \p
3641/// Shift is non-null, then we have matched a shl+add. We allow reassociating
3642/// (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2)) if there is a
3643/// single addi and we don't have a SHXADD instruction we could use.
3644/// FIXME: May still need to check how many and what kind of users the SHL has.
3646 SDValue Add,
3647 SDValue Shift = SDValue()) {
3648 bool FoundADDI = false;
3649 for (auto *User : Add->users()) {
3650 if (isRegRegScaleLoadOrStore(User, Add, Subtarget))
3651 continue;
3652
3653 // Allow a single ADDI that is used by loads/stores if we matched a shift.
3654 if (!Shift || FoundADDI || User->getOpcode() != ISD::ADD ||
3656 !isInt<12>(cast<ConstantSDNode>(User->getOperand(1))->getSExtValue()))
3657 return false;
3658
3659 FoundADDI = true;
3660
3661 // If we have a SHXADD instruction, prefer that over reassociating an ADDI.
3662 assert(Shift.getOpcode() == ISD::SHL);
3663 unsigned ShiftAmt = Shift.getConstantOperandVal(1);
3664 if (Subtarget.hasShlAdd(ShiftAmt))
3665 return false;
3666
3667 // All users of the ADDI should be load/store.
3668 for (auto *ADDIUser : User->users())
3669 if (!isRegRegScaleLoadOrStore(ADDIUser, SDValue(User, 0), Subtarget))
3670 return false;
3671 }
3672
3673 return true;
3674}
3675
3677 unsigned MaxShiftAmount,
3678 SDValue &Base, SDValue &Index,
3679 SDValue &Scale) {
3680 if (Addr.getOpcode() != ISD::ADD)
3681 return false;
3682 SDValue LHS = Addr.getOperand(0);
3683 SDValue RHS = Addr.getOperand(1);
3684
3685 EVT VT = Addr.getSimpleValueType();
3686 auto SelectShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
3687 SDValue &Shift) {
3688 if (N.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(N.getOperand(1)))
3689 return false;
3690
3691 // Only match shifts by a value in range [0, MaxShiftAmount].
3692 unsigned ShiftAmt = N.getConstantOperandVal(1);
3693 if (ShiftAmt > MaxShiftAmount)
3694 return false;
3695
3696 Index = N.getOperand(0);
3697 Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT);
3698 return true;
3699 };
3700
3701 if (auto *C1 = dyn_cast<ConstantSDNode>(RHS)) {
3702 // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
3703 if (LHS.getOpcode() == ISD::ADD &&
3704 !isa<ConstantSDNode>(LHS.getOperand(1)) &&
3705 isInt<12>(C1->getSExtValue())) {
3706 if (SelectShl(LHS.getOperand(1), Index, Scale) &&
3707 isWorthFoldingIntoRegRegScale(*Subtarget, LHS, LHS.getOperand(1))) {
3708 SDValue C1Val = CurDAG->getTargetConstant(*C1->getConstantIntValue(),
3709 SDLoc(Addr), VT);
3710 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
3711 LHS.getOperand(0), C1Val),
3712 0);
3713 return true;
3714 }
3715
3716 // Add is commutative so we need to check both operands.
3717 if (SelectShl(LHS.getOperand(0), Index, Scale) &&
3718 isWorthFoldingIntoRegRegScale(*Subtarget, LHS, LHS.getOperand(0))) {
3719 SDValue C1Val = CurDAG->getTargetConstant(*C1->getConstantIntValue(),
3720 SDLoc(Addr), VT);
3721 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
3722 LHS.getOperand(1), C1Val),
3723 0);
3724 return true;
3725 }
3726 }
3727
3728 // Don't match add with constants.
3729 // FIXME: Is this profitable for large constants that have 0s in the lower
3730 // 12 bits that we can materialize with LUI?
3731 return false;
3732 }
3733
3734 // Try to match a shift on the RHS.
3735 if (SelectShl(RHS, Index, Scale)) {
3736 if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr, RHS))
3737 return false;
3738 Base = LHS;
3739 return true;
3740 }
3741
3742 // Try to match a shift on the LHS.
3743 if (SelectShl(LHS, Index, Scale)) {
3744 if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr, LHS))
3745 return false;
3746 Base = RHS;
3747 return true;
3748 }
3749
3750 if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr))
3751 return false;
3752
3753 Base = LHS;
3754 Index = RHS;
3755 Scale = CurDAG->getTargetConstant(0, SDLoc(Addr), VT);
3756 return true;
3757}
3758
3760 unsigned MaxShiftAmount,
3761 unsigned Bits, SDValue &Base,
3762 SDValue &Index,
3763 SDValue &Scale) {
3764 if (!SelectAddrRegRegScale(Addr, MaxShiftAmount, Base, Index, Scale))
3765 return false;
3766
3767 if (Index.getOpcode() == ISD::AND) {
3768 auto *C = dyn_cast<ConstantSDNode>(Index.getOperand(1));
3769 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
3770 Index = Index.getOperand(0);
3771 return true;
3772 }
3773 }
3774
3775 return false;
3776}
3777
3779 SDValue &Offset) {
3780 if (Addr.getOpcode() != ISD::ADD)
3781 return false;
3782
3783 if (isa<ConstantSDNode>(Addr.getOperand(1)))
3784 return false;
3785
3786 Base = Addr.getOperand(0);
3787 Offset = Addr.getOperand(1);
3788 return true;
3789}
3790
3792 SDValue &ShAmt) {
3793 ShAmt = N;
3794
3795 // Peek through zext.
3796 if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)
3797 ShAmt = ShAmt.getOperand(0);
3798
3799 // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
3800 // amount. If there is an AND on the shift amount, we can bypass it if it
3801 // doesn't affect any of those bits.
3802 if (ShAmt.getOpcode() == ISD::AND &&
3803 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
3804 const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
3805
3806 // Since the max shift amount is a power of 2 we can subtract 1 to make a
3807 // mask that covers the bits needed to represent all shift amounts.
3808 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
3809 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
3810
3811 if (ShMask.isSubsetOf(AndMask)) {
3812 ShAmt = ShAmt.getOperand(0);
3813 } else {
3814 // SimplifyDemandedBits may have optimized the mask so try restoring any
3815 // bits that are known zero.
3816 KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0));
3817 if (!ShMask.isSubsetOf(AndMask | Known.Zero))
3818 return true;
3819 ShAmt = ShAmt.getOperand(0);
3820 }
3821 }
3822
3823 if (ShAmt.getOpcode() == ISD::ADD &&
3824 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
3825 uint64_t Imm = ShAmt.getConstantOperandVal(1);
3826 // If we are shifting by X+N where N == 0 mod Size, then just shift by X
3827 // to avoid the ADD.
3828 if (Imm != 0 && Imm % ShiftWidth == 0) {
3829 ShAmt = ShAmt.getOperand(0);
3830 return true;
3831 }
3832 } else if (ShAmt.getOpcode() == ISD::SUB &&
3833 isa<ConstantSDNode>(ShAmt.getOperand(0))) {
3834 uint64_t Imm = ShAmt.getConstantOperandVal(0);
3835 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
3836 // generate a NEG instead of a SUB of a constant.
3837 if (Imm != 0 && Imm % ShiftWidth == 0) {
3838 SDLoc DL(ShAmt);
3839 EVT VT = ShAmt.getValueType();
3840 SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);
3841 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
3842 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
3843 ShAmt.getOperand(1));
3844 ShAmt = SDValue(Neg, 0);
3845 return true;
3846 }
3847 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3848 // to generate a NOT instead of a SUB of a constant.
3849 if (Imm % ShiftWidth == ShiftWidth - 1) {
3850 SDLoc DL(ShAmt);
3851 EVT VT = ShAmt.getValueType();
3852 MachineSDNode *Not = CurDAG->getMachineNode(
3853 RISCV::XORI, DL, VT, ShAmt.getOperand(1),
3854 CurDAG->getAllOnesConstant(DL, VT, /*isTarget=*/true));
3855 ShAmt = SDValue(Not, 0);
3856 return true;
3857 }
3858 }
3859
3860 return true;
3861}
3862
3863/// RISC-V doesn't have general instructions for integer setne/seteq, but we can
3864/// check for equality with 0. This function emits instructions that convert the
3865/// seteq/setne into something that can be compared with 0.
3866/// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.
3867/// ISD::SETNE).
3869 SDValue &Val) {
3870 assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
3871 "Unexpected condition code!");
3872
3873 // We're looking for a setcc.
3874 if (N->getOpcode() != ISD::SETCC)
3875 return false;
3876
3877 // Must be an equality comparison.
3878 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
3879 if (CCVal != ExpectedCCVal)
3880 return false;
3881
3882 SDValue LHS = N->getOperand(0);
3883 SDValue RHS = N->getOperand(1);
3884
3885 if (!LHS.getValueType().isScalarInteger())
3886 return false;
3887
3888 // If the RHS side is 0, we don't need any extra instructions, return the LHS.
3889 if (isNullConstant(RHS)) {
3890 Val = LHS;
3891 return true;
3892 }
3893
3894 SDLoc DL(N);
3895
3896 if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
3897 int64_t CVal = C->getSExtValue();
3898 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
3899 // non-zero otherwise.
3900 if (CVal == -2048) {
3901 Val = SDValue(
3902 CurDAG->getMachineNode(
3903 RISCV::XORI, DL, N->getValueType(0), LHS,
3904 CurDAG->getSignedTargetConstant(CVal, DL, N->getValueType(0))),
3905 0);
3906 return true;
3907 }
3908 // If the RHS is [-2047,2048], we can use addi/addiw with -RHS to produce 0
3909 // if the LHS is equal to the RHS and non-zero otherwise.
3910 if (isInt<12>(CVal) || CVal == 2048) {
3911 unsigned Opc = RISCV::ADDI;
3912 if (LHS.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3913 cast<VTSDNode>(LHS.getOperand(1))->getVT() == MVT::i32) {
3914 Opc = RISCV::ADDIW;
3915 LHS = LHS.getOperand(0);
3916 }
3917
3918 Val = SDValue(CurDAG->getMachineNode(Opc, DL, N->getValueType(0), LHS,
3919 CurDAG->getSignedTargetConstant(
3920 -CVal, DL, N->getValueType(0))),
3921 0);
3922 return true;
3923 }
3924 if (isPowerOf2_64(CVal) && Subtarget->hasStdExtZbs()) {
3925 Val = SDValue(
3926 CurDAG->getMachineNode(
3927 RISCV::BINVI, DL, N->getValueType(0), LHS,
3928 CurDAG->getTargetConstant(Log2_64(CVal), DL, N->getValueType(0))),
3929 0);
3930 return true;
3931 }
3932 // Same as the addi case above but for larger immediates (signed 26-bit) use
3933 // the QC_E_ADDI instruction from the Xqcilia extension, if available. Avoid
3934 // anything which can be done with a single lui as it might be compressible.
3935 if (Subtarget->hasVendorXqcilia() && isInt<26>(CVal) &&
3936 (CVal & 0xFFF) != 0) {
3937 Val = SDValue(
3938 CurDAG->getMachineNode(
3939 RISCV::QC_E_ADDI, DL, N->getValueType(0), LHS,
3940 CurDAG->getSignedTargetConstant(-CVal, DL, N->getValueType(0))),
3941 0);
3942 return true;
3943 }
3944 }
3945
3946 // If nothing else we can XOR the LHS and RHS to produce zero if they are
3947 // equal and a non-zero value if they aren't.
3948 Val = SDValue(
3949 CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0);
3950 return true;
3951}
3952
3954 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3955 cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) {
3956 Val = N.getOperand(0);
3957 return true;
3958 }
3959
3960 auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
3961 if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1)))
3962 return N;
3963
3964 SDValue N0 = N.getOperand(0);
3965 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
3966 N.getConstantOperandVal(1) == ShiftAmt &&
3967 N0.getConstantOperandVal(1) == ShiftAmt)
3968 return N0.getOperand(0);
3969
3970 return N;
3971 };
3972
3973 MVT VT = N.getSimpleValueType();
3974 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) {
3975 Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
3976 return true;
3977 }
3978
3979 return false;
3980}
3981
3983 if (N.getOpcode() == ISD::AND) {
3984 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
3985 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
3986 Val = N.getOperand(0);
3987 return true;
3988 }
3989 }
3990 MVT VT = N.getSimpleValueType();
3991 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits);
3992 if (CurDAG->MaskedValueIsZero(N, Mask)) {
3993 Val = N;
3994 return true;
3995 }
3996
3997 return false;
3998}
3999
4000/// Look for various patterns that can be done with a SHL that can be folded
4001/// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
4002/// SHXADD we are trying to match.
4004 SDValue &Val) {
4005 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
4006 SDValue N0 = N.getOperand(0);
4007
4008 if (bool LeftShift = N0.getOpcode() == ISD::SHL;
4009 (LeftShift || N0.getOpcode() == ISD::SRL) &&
4011 uint64_t Mask = N.getConstantOperandVal(1);
4012 unsigned C2 = N0.getConstantOperandVal(1);
4013
4014 unsigned XLen = Subtarget->getXLen();
4015 if (LeftShift)
4016 Mask &= maskTrailingZeros<uint64_t>(C2);
4017 else
4018 Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
4019
4020 if (isShiftedMask_64(Mask)) {
4021 unsigned Leading = XLen - llvm::bit_width(Mask);
4022 unsigned Trailing = llvm::countr_zero(Mask);
4023 if (Trailing != ShAmt)
4024 return false;
4025
4026 unsigned Opcode;
4027 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
4028 // leading zeros and c3 trailing zeros. We can use an SRLI by c3-c2
4029 // followed by a SHXADD with c3 for the X amount.
4030 if (LeftShift && Leading == 0 && C2 < Trailing)
4031 Opcode = RISCV::SRLI;
4032 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with 32-c2
4033 // leading zeros and c3 trailing zeros. We can use an SRLIW by c3-c2
4034 // followed by a SHXADD with c3 for the X amount.
4035 else if (LeftShift && Leading == 32 - C2 && C2 < Trailing)
4036 Opcode = RISCV::SRLIW;
4037 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
4038 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
4039 // followed by a SHXADD using c3 for the X amount.
4040 else if (!LeftShift && Leading == C2)
4041 Opcode = RISCV::SRLI;
4042 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with 32+c2
4043 // leading zeros and c3 trailing zeros. We can use an SRLIW by c2+c3
4044 // followed by a SHXADD using c3 for the X amount.
4045 else if (!LeftShift && Leading == 32 + C2)
4046 Opcode = RISCV::SRLIW;
4047 else
4048 return false;
4049
4050 SDLoc DL(N);
4051 EVT VT = N.getValueType();
4052 ShAmt = LeftShift ? Trailing - C2 : Trailing + C2;
4053 Val = SDValue(
4054 CurDAG->getMachineNode(Opcode, DL, VT, N0.getOperand(0),
4055 CurDAG->getTargetConstant(ShAmt, DL, VT)),
4056 0);
4057 return true;
4058 }
4059 } else if (N0.getOpcode() == ISD::SRA && N0.hasOneUse() &&
4061 uint64_t Mask = N.getConstantOperandVal(1);
4062 unsigned C2 = N0.getConstantOperandVal(1);
4063
4064 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
4065 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
4066 // use (srli (srai y, c2 - c3), c3 + c4) followed by a SHXADD with c4 as
4067 // the X amount.
4068 if (isShiftedMask_64(Mask)) {
4069 unsigned XLen = Subtarget->getXLen();
4070 unsigned Leading = XLen - llvm::bit_width(Mask);
4071 unsigned Trailing = llvm::countr_zero(Mask);
4072 if (C2 > Leading && Leading > 0 && Trailing == ShAmt) {
4073 SDLoc DL(N);
4074 EVT VT = N.getValueType();
4075 Val = SDValue(CurDAG->getMachineNode(
4076 RISCV::SRAI, DL, VT, N0.getOperand(0),
4077 CurDAG->getTargetConstant(C2 - Leading, DL, VT)),
4078 0);
4079 Val = SDValue(CurDAG->getMachineNode(
4080 RISCV::SRLI, DL, VT, Val,
4081 CurDAG->getTargetConstant(Leading + ShAmt, DL, VT)),
4082 0);
4083 return true;
4084 }
4085 }
4086 }
4087 } else if (bool LeftShift = N.getOpcode() == ISD::SHL;
4088 (LeftShift || N.getOpcode() == ISD::SRL) &&
4089 isa<ConstantSDNode>(N.getOperand(1))) {
4090 SDValue N0 = N.getOperand(0);
4091 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
4093 uint64_t Mask = N0.getConstantOperandVal(1);
4094 if (isShiftedMask_64(Mask)) {
4095 unsigned C1 = N.getConstantOperandVal(1);
4096 unsigned XLen = Subtarget->getXLen();
4097 unsigned Leading = XLen - llvm::bit_width(Mask);
4098 unsigned Trailing = llvm::countr_zero(Mask);
4099 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
4100 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
4101 if (LeftShift && Leading == 32 && Trailing > 0 &&
4102 (Trailing + C1) == ShAmt) {
4103 SDLoc DL(N);
4104 EVT VT = N.getValueType();
4105 Val = SDValue(CurDAG->getMachineNode(
4106 RISCV::SRLIW, DL, VT, N0.getOperand(0),
4107 CurDAG->getTargetConstant(Trailing, DL, VT)),
4108 0);
4109 return true;
4110 }
4111 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
4112 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
4113 if (!LeftShift && Leading == 32 && Trailing > C1 &&
4114 (Trailing - C1) == ShAmt) {
4115 SDLoc DL(N);
4116 EVT VT = N.getValueType();
4117 Val = SDValue(CurDAG->getMachineNode(
4118 RISCV::SRLIW, DL, VT, N0.getOperand(0),
4119 CurDAG->getTargetConstant(Trailing, DL, VT)),
4120 0);
4121 return true;
4122 }
4123 }
4124 }
4125 }
4126
4127 return false;
4128}
4129
4130/// Look for various patterns that can be done with a SHL that can be folded
4131/// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
4132/// SHXADD_UW we are trying to match.
4134 SDValue &Val) {
4135 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) &&
4136 N.hasOneUse()) {
4137 SDValue N0 = N.getOperand(0);
4138 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
4139 N0.hasOneUse()) {
4140 uint64_t Mask = N.getConstantOperandVal(1);
4141 unsigned C2 = N0.getConstantOperandVal(1);
4142
4143 Mask &= maskTrailingZeros<uint64_t>(C2);
4144
4145 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
4146 // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
4147 // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
4148 if (isShiftedMask_64(Mask)) {
4149 unsigned Leading = llvm::countl_zero(Mask);
4150 unsigned Trailing = llvm::countr_zero(Mask);
4151 if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
4152 SDLoc DL(N);
4153 EVT VT = N.getValueType();
4154 Val = SDValue(CurDAG->getMachineNode(
4155 RISCV::SLLI, DL, VT, N0.getOperand(0),
4156 CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)),
4157 0);
4158 return true;
4159 }
4160 }
4161 }
4162 }
4163
4164 return false;
4165}
4166
4168 assert(N->getOpcode() == ISD::OR || N->getOpcode() == RISCVISD::OR_VL);
4169 if (N->getFlags().hasDisjoint())
4170 return true;
4171 return CurDAG->haveNoCommonBitsSet(N->getOperand(0), N->getOperand(1));
4172}
4173
4174bool RISCVDAGToDAGISel::selectImm64IfCheaper(int64_t Imm, int64_t OrigImm,
4175 SDValue N, SDValue &Val) {
4176 int OrigCost = RISCVMatInt::getIntMatCost(APInt(64, OrigImm), 64, *Subtarget,
4177 /*CompressionCost=*/true);
4178 int Cost = RISCVMatInt::getIntMatCost(APInt(64, Imm), 64, *Subtarget,
4179 /*CompressionCost=*/true);
4180 if (OrigCost <= Cost)
4181 return false;
4182
4183 Val = selectImm(CurDAG, SDLoc(N), N->getSimpleValueType(0), Imm, *Subtarget);
4184 return true;
4185}
4186
4188 if (!isa<ConstantSDNode>(N))
4189 return false;
4190 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
4191 if ((Imm >> 31) != 1)
4192 return false;
4193
4194 for (const SDNode *U : N->users()) {
4195 switch (U->getOpcode()) {
4196 case ISD::ADD:
4197 break;
4198 case ISD::OR:
4199 if (orDisjoint(U))
4200 break;
4201 return false;
4202 default:
4203 return false;
4204 }
4205 }
4206
4207 return selectImm64IfCheaper(0xffffffff00000000 | Imm, Imm, N, Val);
4208}
4209
4211 if (!isa<ConstantSDNode>(N))
4212 return false;
4213 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
4214 if (isInt<32>(Imm))
4215 return false;
4216 if (Imm == INT64_MIN)
4217 return false;
4218
4219 for (const SDNode *U : N->users()) {
4220 switch (U->getOpcode()) {
4221 case ISD::ADD:
4222 break;
4223 case RISCVISD::VMV_V_X_VL:
4224 if (!all_of(U->users(), [](const SDNode *V) {
4225 return V->getOpcode() == ISD::ADD ||
4226 V->getOpcode() == RISCVISD::ADD_VL;
4227 }))
4228 return false;
4229 break;
4230 default:
4231 return false;
4232 }
4233 }
4234
4235 return selectImm64IfCheaper(-Imm, Imm, N, Val);
4236}
4237
4239 if (!isa<ConstantSDNode>(N))
4240 return false;
4241 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
4242
4243 // For 32-bit signed constants, we can only substitute LUI+ADDI with LUI.
4244 if (isInt<32>(Imm) && ((Imm & 0xfff) != 0xfff || Imm == -1))
4245 return false;
4246
4247 // Abandon this transform if the constant is needed elsewhere.
4248 for (const SDNode *U : N->users()) {
4249 switch (U->getOpcode()) {
4250 case ISD::AND:
4251 case ISD::OR:
4252 case ISD::XOR:
4253 if (!(Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbkb()))
4254 return false;
4255 break;
4256 case RISCVISD::VMV_V_X_VL:
4257 if (!Subtarget->hasStdExtZvkb())
4258 return false;
4259 if (!all_of(U->users(), [](const SDNode *V) {
4260 return V->getOpcode() == ISD::AND ||
4261 V->getOpcode() == RISCVISD::AND_VL;
4262 }))
4263 return false;
4264 break;
4265 default:
4266 return false;
4267 }
4268 }
4269
4270 if (isInt<32>(Imm)) {
4271 Val =
4272 selectImm(CurDAG, SDLoc(N), N->getSimpleValueType(0), ~Imm, *Subtarget);
4273 return true;
4274 }
4275
4276 // For 64-bit constants, the instruction sequences get complex,
4277 // so we select inverted only if it's cheaper.
4278 return selectImm64IfCheaper(~Imm, Imm, N, Val);
4279}
4280
4281static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
4282 unsigned Bits,
4283 const TargetInstrInfo *TII) {
4284 unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode());
4285
4286 if (!MCOpcode)
4287 return false;
4288
4289 const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
4290 const uint64_t TSFlags = MCID.TSFlags;
4291 if (!RISCVII::hasSEWOp(TSFlags))
4292 return false;
4293 assert(RISCVII::hasVLOp(TSFlags));
4294
4295 unsigned ChainOpIdx = User->getNumOperands() - 1;
4296 bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other;
4297 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
4298 unsigned VLIdx = User->getNumOperands() - HasVecPolicyOp - HasChainOp - 2;
4299 const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1);
4300
4301 if (UserOpNo == VLIdx)
4302 return false;
4303
4304 auto NumDemandedBits =
4305 RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW);
4306 return NumDemandedBits && Bits >= *NumDemandedBits;
4307}
4308
4309// Return true if all users of this SDNode* only consume the lower \p Bits.
4310// This can be used to form W instructions for add/sub/mul/shl even when the
4311// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
4312// SimplifyDemandedBits has made it so some users see a sext_inreg and some
4313// don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
4314// the add/sub/mul/shl to become non-W instructions. By checking the users we
4315// may be able to use a W instruction and CSE with the other instruction if
4316// this has happened. We could try to detect that the CSE opportunity exists
4317// before doing this, but that would be more complicated.
4319 const unsigned Depth) const {
4320 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
4321 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
4322 Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
4323 Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
4324 Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
4325 isa<ConstantSDNode>(Node) || Depth != 0) &&
4326 "Unexpected opcode");
4327
4329 return false;
4330
4331 // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
4332 // the VT. Ensure the type is scalar to avoid wasting time on vectors.
4333 if (Depth == 0 && !Node->getValueType(0).isScalarInteger())
4334 return false;
4335
4336 for (SDUse &Use : Node->uses()) {
4337 SDNode *User = Use.getUser();
4338 // Users of this node should have already been instruction selected
4339 if (!User->isMachineOpcode())
4340 return false;
4341
4342 // TODO: Add more opcodes?
4343 switch (User->getMachineOpcode()) {
4344 default:
4346 break;
4347 return false;
4348 case RISCV::ADDW:
4349 case RISCV::ADDIW:
4350 case RISCV::SUBW:
4351 case RISCV::MULW:
4352 case RISCV::SLLW:
4353 case RISCV::SLLIW:
4354 case RISCV::SRAW:
4355 case RISCV::SRAIW:
4356 case RISCV::SRLW:
4357 case RISCV::SRLIW:
4358 case RISCV::DIVW:
4359 case RISCV::DIVUW:
4360 case RISCV::REMW:
4361 case RISCV::REMUW:
4362 case RISCV::ROLW:
4363 case RISCV::RORW:
4364 case RISCV::RORIW:
4365 case RISCV::CLSW:
4366 case RISCV::CLZW:
4367 case RISCV::CTZW:
4368 case RISCV::CPOPW:
4369 case RISCV::SLLI_UW:
4370 case RISCV::ABSW:
4371 case RISCV::FMV_W_X:
4372 case RISCV::FCVT_H_W:
4373 case RISCV::FCVT_H_W_INX:
4374 case RISCV::FCVT_H_WU:
4375 case RISCV::FCVT_H_WU_INX:
4376 case RISCV::FCVT_S_W:
4377 case RISCV::FCVT_S_W_INX:
4378 case RISCV::FCVT_S_WU:
4379 case RISCV::FCVT_S_WU_INX:
4380 case RISCV::FCVT_D_W:
4381 case RISCV::FCVT_D_W_INX:
4382 case RISCV::FCVT_D_WU:
4383 case RISCV::FCVT_D_WU_INX:
4384 case RISCV::TH_REVW:
4385 case RISCV::TH_SRRIW:
4386 if (Bits >= 32)
4387 break;
4388 return false;
4389 case RISCV::SLL:
4390 case RISCV::SRA:
4391 case RISCV::SRL:
4392 case RISCV::ROL:
4393 case RISCV::ROR:
4394 case RISCV::BSET:
4395 case RISCV::BCLR:
4396 case RISCV::BINV:
4397 // Shift amount operands only use log2(Xlen) bits.
4398 if (Use.getOperandNo() == 1 && Bits >= Log2_32(Subtarget->getXLen()))
4399 break;
4400 return false;
4401 case RISCV::SLLI:
4402 // SLLI only uses the lower (XLen - ShAmt) bits.
4403 if (Bits >= Subtarget->getXLen() - User->getConstantOperandVal(1))
4404 break;
4405 return false;
4406 case RISCV::ANDI:
4407 if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1)))
4408 break;
4409 goto RecCheck;
4410 case RISCV::ORI: {
4411 uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
4412 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
4413 break;
4414 [[fallthrough]];
4415 }
4416 case RISCV::AND:
4417 case RISCV::OR:
4418 case RISCV::XOR:
4419 case RISCV::XORI:
4420 case RISCV::ANDN:
4421 case RISCV::ORN:
4422 case RISCV::XNOR:
4423 case RISCV::SH1ADD:
4424 case RISCV::SH2ADD:
4425 case RISCV::SH3ADD:
4426 RecCheck:
4427 if (hasAllNBitUsers(User, Bits, Depth + 1))
4428 break;
4429 return false;
4430 case RISCV::SRLI: {
4431 unsigned ShAmt = User->getConstantOperandVal(1);
4432 // If we are shifting right by less than Bits, and users don't demand any
4433 // bits that were shifted into [Bits-1:0], then we can consider this as an
4434 // N-Bit user.
4435 if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1))
4436 break;
4437 return false;
4438 }
4439 case RISCV::SEXT_B:
4440 case RISCV::PACKH:
4441 if (Bits >= 8)
4442 break;
4443 return false;
4444 case RISCV::SEXT_H:
4445 case RISCV::FMV_H_X:
4446 case RISCV::ZEXT_H_RV32:
4447 case RISCV::ZEXT_H_RV64:
4448 case RISCV::PACKW:
4449 if (Bits >= 16)
4450 break;
4451 return false;
4452 case RISCV::PACK:
4453 if (Bits >= (Subtarget->getXLen() / 2))
4454 break;
4455 return false;
4456 case RISCV::PPAIRE_H:
4457 // If only the lower 32-bits of the result are used, then only the
4458 // lower 16 bits of the inputs are used.
4459 if (Bits >= 16 && hasAllNBitUsers(User, 32, Depth + 1))
4460 break;
4461 return false;
4462 case RISCV::ADD_UW:
4463 case RISCV::SH1ADD_UW:
4464 case RISCV::SH2ADD_UW:
4465 case RISCV::SH3ADD_UW:
4466 // The first operand to add.uw/shXadd.uw is implicitly zero extended from
4467 // 32 bits.
4468 if (Use.getOperandNo() == 0 && Bits >= 32)
4469 break;
4470 return false;
4471 case RISCV::SB:
4472 if (Use.getOperandNo() == 0 && Bits >= 8)
4473 break;
4474 return false;
4475 case RISCV::SH:
4476 if (Use.getOperandNo() == 0 && Bits >= 16)
4477 break;
4478 return false;
4479 case RISCV::SW:
4480 if (Use.getOperandNo() == 0 && Bits >= 32)
4481 break;
4482 return false;
4483 case RISCV::TH_EXT:
4484 case RISCV::TH_EXTU: {
4485 unsigned Msb = User->getConstantOperandVal(1);
4486 unsigned Lsb = User->getConstantOperandVal(2);
4487 // Behavior of Msb < Lsb is not well documented.
4488 if (Msb >= Lsb && Bits > Msb)
4489 break;
4490 return false;
4491 }
4492 }
4493 }
4494
4495 return true;
4496}
4497
4498// Select a constant that can be represented as (sign_extend(imm5) << imm2).
4500 SDValue &Shl2) {
4501 auto *C = dyn_cast<ConstantSDNode>(N);
4502 if (!C)
4503 return false;
4504
4505 int64_t Offset = C->getSExtValue();
4506 for (unsigned Shift = 0; Shift < 4; Shift++) {
4507 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0)) {
4508 EVT VT = N->getValueType(0);
4509 Simm5 = CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(N), VT);
4510 Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), VT);
4511 return true;
4512 }
4513 }
4514
4515 return false;
4516}
4517
4518// Select VL as a 5 bit immediate or a value that will become a register. This
4519// allows us to choose between VSETIVLI or VSETVLI later.
4521 auto *C = dyn_cast<ConstantSDNode>(N);
4522 if (C && isUInt<5>(C->getZExtValue())) {
4523 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
4524 N->getValueType(0));
4525 } else if (C && C->isAllOnes()) {
4526 // Treat all ones as VLMax.
4527 VL = CurDAG->getSignedTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
4528 N->getValueType(0));
4529 } else if (isa<RegisterSDNode>(N) &&
4530 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {
4531 // All our VL operands use an operand that allows GPRNoX0 or an immediate
4532 // as the register class. Convert X0 to a special immediate to pass the
4533 // MachineVerifier. This is recognized specially by the vsetvli insertion
4534 // pass.
4535 VL = CurDAG->getSignedTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
4536 N->getValueType(0));
4537 } else {
4538 VL = N;
4539 }
4540
4541 return true;
4542}
4543
4545 if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {
4546 if (!N.getOperand(0).isUndef())
4547 return SDValue();
4548 N = N.getOperand(1);
4549 }
4550 SDValue Splat = N;
4551 if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&
4552 Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||
4553 !Splat.getOperand(0).isUndef())
4554 return SDValue();
4555 assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");
4556 return Splat;
4557}
4558
4561 if (!Splat)
4562 return false;
4563
4564 SplatVal = Splat.getOperand(1);
4565 return true;
4566}
4567
4569 SelectionDAG &DAG,
4570 const RISCVSubtarget &Subtarget,
4571 std::function<bool(int64_t)> ValidateImm,
4572 bool Decrement = false) {
4574 if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1)))
4575 return false;
4576
4577 const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();
4578 assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&
4579 "Unexpected splat operand type");
4580
4581 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
4582 // type is wider than the resulting vector element type: an implicit
4583 // truncation first takes place. Therefore, perform a manual
4584 // truncation/sign-extension in order to ignore any truncated bits and catch
4585 // any zero-extended immediate.
4586 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
4587 // sign-extending to (XLenVT -1).
4588 APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize);
4589
4590 int64_t SplatImm = SplatConst.getSExtValue();
4591
4592 if (!ValidateImm(SplatImm))
4593 return false;
4594
4595 if (Decrement)
4596 SplatImm -= 1;
4597
4598 SplatVal =
4599 DAG.getSignedTargetConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT());
4600 return true;
4601}
4602
4604 return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget,
4605 [](int64_t Imm) { return isInt<5>(Imm); });
4606}
4607
4609 return selectVSplatImmHelper(
4610 N, SplatVal, *CurDAG, *Subtarget,
4611 [](int64_t Imm) { return Imm >= -15 && Imm <= 16; },
4612 /*Decrement=*/true);
4613}
4614
4616 return selectVSplatImmHelper(
4617 N, SplatVal, *CurDAG, *Subtarget,
4618 [](int64_t Imm) { return Imm >= -15 && Imm <= 16; },
4619 /*Decrement=*/false);
4620}
4621
4623 SDValue &SplatVal) {
4624 return selectVSplatImmHelper(
4625 N, SplatVal, *CurDAG, *Subtarget,
4626 [](int64_t Imm) { return Imm != 0 && Imm >= -15 && Imm <= 16; },
4627 /*Decrement=*/true);
4628}
4629
4631 SDValue &SplatVal) {
4632 return selectVSplatImmHelper(
4633 N, SplatVal, *CurDAG, *Subtarget,
4634 [Bits](int64_t Imm) { return isUIntN(Bits, Imm); });
4635}
4636
4639 return Splat && selectNegImm(Splat.getOperand(1), SplatVal);
4640}
4641
4643 auto IsExtOrTrunc = [](SDValue N) {
4644 switch (N->getOpcode()) {
4645 case ISD::SIGN_EXTEND:
4646 case ISD::ZERO_EXTEND:
4647 // There's no passthru on these _VL nodes so any VL/mask is ok, since any
4648 // inactive elements will be undef.
4649 case RISCVISD::TRUNCATE_VECTOR_VL:
4650 case RISCVISD::VSEXT_VL:
4651 case RISCVISD::VZEXT_VL:
4652 return true;
4653 default:
4654 return false;
4655 }
4656 };
4657
4658 // We can have multiple nested nodes, so unravel them all if needed.
4659 while (IsExtOrTrunc(N)) {
4660 if (!N.hasOneUse() || N.getScalarValueSizeInBits() < 8)
4661 return false;
4662 N = N->getOperand(0);
4663 }
4664
4665 return selectVSplat(N, SplatVal);
4666}
4667
4669 // Allow bitcasts from XLenVT -> FP.
4670 if (N.getOpcode() == ISD::BITCAST &&
4671 N.getOperand(0).getValueType() == Subtarget->getXLenVT()) {
4672 Imm = N.getOperand(0);
4673 return true;
4674 }
4675 // Allow moves from XLenVT to FP.
4676 if (N.getOpcode() == RISCVISD::FMV_H_X ||
4677 N.getOpcode() == RISCVISD::FMV_W_X_RV64) {
4678 Imm = N.getOperand(0);
4679 return true;
4680 }
4681
4682 // Otherwise, look for FP constants that can materialized with scalar int.
4684 if (!CFP)
4685 return false;
4686 const APFloat &APF = CFP->getValueAPF();
4687 // td can handle +0.0 already.
4688 if (APF.isPosZero())
4689 return false;
4690
4691 MVT VT = CFP->getSimpleValueType(0);
4692
4693 MVT XLenVT = Subtarget->getXLenVT();
4694 if (VT == MVT::f64 && !Subtarget->is64Bit()) {
4695 assert(APF.isNegZero() && "Unexpected constant.");
4696 return false;
4697 }
4698 SDLoc DL(N);
4699 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
4700 *Subtarget);
4701 return true;
4702}
4703
4705 SDValue &Imm) {
4706 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
4707 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);
4708
4709 if (!isInt<5>(ImmVal))
4710 return false;
4711
4712 Imm = CurDAG->getSignedTargetConstant(ImmVal, SDLoc(N),
4713 Subtarget->getXLenVT());
4714 return true;
4715 }
4716
4717 return false;
4718}
4719
4720// Match XOR with a VMSET_VL operand. Return the other operand.
4722 if (N.getOpcode() != ISD::XOR)
4723 return false;
4724
4725 if (N.getOperand(0).getOpcode() == RISCVISD::VMSET_VL) {
4726 Res = N.getOperand(1);
4727 return true;
4728 }
4729
4730 if (N.getOperand(1).getOpcode() == RISCVISD::VMSET_VL) {
4731 Res = N.getOperand(0);
4732 return true;
4733 }
4734
4735 return false;
4736}
4737
4738// Match VMXOR_VL with a VMSET_VL operand. Making sure that that VL operand
4739// matches the parent's VL. Return the other operand of the VMXOR_VL.
4741 SDValue &Res) {
4742 if (N.getOpcode() != RISCVISD::VMXOR_VL)
4743 return false;
4744
4745 assert(Parent &&
4746 (Parent->getOpcode() == RISCVISD::VMAND_VL ||
4747 Parent->getOpcode() == RISCVISD::VMOR_VL ||
4748 Parent->getOpcode() == RISCVISD::VMXOR_VL) &&
4749 "Unexpected parent");
4750
4751 // The VL should match the parent.
4752 if (Parent->getOperand(2) != N->getOperand(2))
4753 return false;
4754
4755 if (N.getOperand(0).getOpcode() == RISCVISD::VMSET_VL) {
4756 Res = N.getOperand(1);
4757 return true;
4758 }
4759
4760 if (N.getOperand(1).getOpcode() == RISCVISD::VMSET_VL) {
4761 Res = N.getOperand(0);
4762 return true;
4763 }
4764
4765 return false;
4766}
4767
4768// Try to remove sext.w if the input is a W instruction or can be made into
4769// a W instruction cheaply.
4770bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
4771 // Look for the sext.w pattern, addiw rd, rs1, 0.
4772 if (N->getMachineOpcode() != RISCV::ADDIW ||
4773 !isNullConstant(N->getOperand(1)))
4774 return false;
4775
4776 SDValue N0 = N->getOperand(0);
4777 if (!N0.isMachineOpcode())
4778 return false;
4779
4780 switch (N0.getMachineOpcode()) {
4781 default:
4782 break;
4783 case RISCV::ADD:
4784 case RISCV::ADDI:
4785 case RISCV::SUB:
4786 case RISCV::MUL:
4787 case RISCV::SLLI: {
4788 // Convert sext.w+add/sub/mul to their W instructions. This will create
4789 // a new independent instruction. This improves latency.
4790 unsigned Opc;
4791 switch (N0.getMachineOpcode()) {
4792 default:
4793 llvm_unreachable("Unexpected opcode!");
4794 case RISCV::ADD: Opc = RISCV::ADDW; break;
4795 case RISCV::ADDI: Opc = RISCV::ADDIW; break;
4796 case RISCV::SUB: Opc = RISCV::SUBW; break;
4797 case RISCV::MUL: Opc = RISCV::MULW; break;
4798 case RISCV::SLLI: Opc = RISCV::SLLIW; break;
4799 }
4800
4801 SDValue N00 = N0.getOperand(0);
4802 SDValue N01 = N0.getOperand(1);
4803
4804 // Shift amount needs to be uimm5.
4805 if (N0.getMachineOpcode() == RISCV::SLLI &&
4806 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
4807 break;
4808
4809 SDNode *Result =
4810 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
4811 N00, N01);
4812 ReplaceUses(N, Result);
4813 return true;
4814 }
4815 case RISCV::ADDW:
4816 case RISCV::ADDIW:
4817 case RISCV::SUBW:
4818 case RISCV::MULW:
4819 case RISCV::SLLIW:
4820 case RISCV::PACKW:
4821 case RISCV::TH_MULAW:
4822 case RISCV::TH_MULAH:
4823 case RISCV::TH_MULSW:
4824 case RISCV::TH_MULSH:
4825 if (N0.getValueType() == MVT::i32)
4826 break;
4827
4828 // Result is already sign extended just remove the sext.w.
4829 // NOTE: We only handle the nodes that are selected with hasAllWUsers.
4830 ReplaceUses(N, N0.getNode());
4831 return true;
4832 }
4833
4834 return false;
4835}
4836
4837static bool usesAllOnesMask(SDValue MaskOp) {
4838 const auto IsVMSet = [](unsigned Opc) {
4839 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
4840 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
4841 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
4842 Opc == RISCV::PseudoVMSET_M_B8;
4843 };
4844
4845 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
4846 // undefined behaviour if it's the wrong bitwidth, so we could choose to
4847 // assume that it's all-ones? Same applies to its VL.
4848 return MaskOp->isMachineOpcode() && IsVMSet(MaskOp.getMachineOpcode());
4849}
4850
4851static bool isImplicitDef(SDValue V) {
4852 if (!V.isMachineOpcode())
4853 return false;
4854 if (V.getMachineOpcode() == TargetOpcode::REG_SEQUENCE) {
4855 for (unsigned I = 1; I < V.getNumOperands(); I += 2)
4856 if (!isImplicitDef(V.getOperand(I)))
4857 return false;
4858 return true;
4859 }
4860 return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
4861}
4862
4863// Optimize masked RVV pseudo instructions with a known all-ones mask to their
4864// corresponding "unmasked" pseudo versions.
4865bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
4866 const RISCV::RISCVMaskedPseudoInfo *I =
4867 RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
4868 if (!I)
4869 return false;
4870
4871 unsigned MaskOpIdx = I->MaskOpIdx;
4872 if (!usesAllOnesMask(N->getOperand(MaskOpIdx)))
4873 return false;
4874
4875 // There are two classes of pseudos in the table - compares and
4876 // everything else. See the comment on RISCVMaskedPseudo for details.
4877 const unsigned Opc = I->UnmaskedPseudo;
4878 const MCInstrDesc &MCID = TII->get(Opc);
4879 const bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MCID);
4880
4881 const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode());
4882 const bool MaskedHasPassthru = RISCVII::isFirstDefTiedToFirstUse(MaskedMCID);
4883
4884 assert((RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) ||
4886 "Unmasked pseudo has policy but masked pseudo doesn't?");
4887 assert(RISCVII::hasVecPolicyOp(MCID.TSFlags) == HasPassthru &&
4888 "Unexpected pseudo structure");
4889 assert(!(HasPassthru && !MaskedHasPassthru) &&
4890 "Unmasked pseudo has passthru but masked pseudo doesn't?");
4891
4893 // Skip the passthru operand at index 0 if the unmasked don't have one.
4894 bool ShouldSkip = !HasPassthru && MaskedHasPassthru;
4895 bool DropPolicy = !RISCVII::hasVecPolicyOp(MCID.TSFlags) &&
4896 RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags);
4897 bool HasChainOp =
4898 N->getOperand(N->getNumOperands() - 1).getValueType() == MVT::Other;
4899 unsigned LastOpNum = N->getNumOperands() - 1 - HasChainOp;
4900 for (unsigned I = ShouldSkip, E = N->getNumOperands(); I != E; I++) {
4901 // Skip the mask
4902 SDValue Op = N->getOperand(I);
4903 if (I == MaskOpIdx)
4904 continue;
4905 if (DropPolicy && I == LastOpNum)
4906 continue;
4907 Ops.push_back(Op);
4908 }
4909
4910 MachineSDNode *Result =
4911 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4912
4913 if (!N->memoperands_empty())
4914 CurDAG->setNodeMemRefs(Result, N->memoperands());
4915
4916 Result->setFlags(N->getFlags());
4917 ReplaceUses(N, Result);
4918
4919 return true;
4920}
4921
4922/// If our passthru is an implicit_def, use noreg instead. This side
4923/// steps issues with MachineCSE not being able to CSE expressions with
4924/// IMPLICIT_DEF operands while preserving the semantic intent. See
4925/// pr64282 for context. Note that this transform is the last one
4926/// performed at ISEL DAG to DAG.
4927bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
4928 bool MadeChange = false;
4929 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
4930
4931 while (Position != CurDAG->allnodes_begin()) {
4932 SDNode *N = &*--Position;
4933 if (N->use_empty() || !N->isMachineOpcode())
4934 continue;
4935
4936 const unsigned Opc = N->getMachineOpcode();
4937 if (!RISCVVPseudosTable::getPseudoInfo(Opc) ||
4939 !isImplicitDef(N->getOperand(0)))
4940 continue;
4941
4943 Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0)));
4944 for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
4945 SDValue Op = N->getOperand(I);
4946 Ops.push_back(Op);
4947 }
4948
4949 MachineSDNode *Result =
4950 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4951 Result->setFlags(N->getFlags());
4952 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands());
4953 ReplaceUses(N, Result);
4954 MadeChange = true;
4955 }
4956 return MadeChange;
4957}
4958
4959
4960// This pass converts a legalized DAG into a RISCV-specific DAG, ready
4961// for instruction scheduling.
4963 CodeGenOptLevel OptLevel) {
4964 return new RISCVDAGToDAGISelLegacy(TM, OptLevel);
4965}
4966
4968
4973
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define X(NUM, ENUM, NAME)
Definition ELF.h:851
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define DEBUG_TYPE
const HexagonInstrInfo * TII
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
Register const TargetRegisterInfo * TRI
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
static bool getVal(MDTuple *MD, const char *Key, uint64_t &Val)
static bool usesAllOnesMask(SDValue MaskOp)
static Register getTileReg(uint64_t TileNum)
static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, int64_t Imm, const RISCVSubtarget &Subtarget)
static bool isRegRegScaleLoadOrStore(SDNode *User, SDValue Add, const RISCVSubtarget &Subtarget)
Return true if this a load/store that we have a RegRegScale instruction for.
static std::pair< SDValue, SDValue > extractGPRPair(SelectionDAG *CurDAG, const SDLoc &DL, SDValue Pair)
#define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix)
static bool isWorthFoldingAdd(SDValue Add)
static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, RISCVMatInt::InstSeq &Seq)
static bool isImplicitDef(SDValue V)
#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix)
static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, std::function< bool(int64_t)> ValidateImm, bool Decrement=false)
static unsigned getSegInstNF(unsigned Intrinsic)
static bool isWorthFoldingIntoRegRegScale(const RISCVSubtarget &Subtarget, SDValue Add, SDValue Shift=SDValue())
Is it profitable to fold this Add into RegRegScale load/store.
static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo, unsigned Bits, const TargetInstrInfo *TII)
static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, const RISCVSubtarget *Subtarget, SDValue Addr, SDValue &Base, SDValue &Offset, bool IsPrefetch=false)
#define INST_ALL_NF_CASE_WITH_FF(NAME)
#define CASE_VMSLT_OPCODES(lmulenum, suffix)
static SDValue buildGPRPair(SelectionDAG *CurDAG, const SDLoc &DL, MVT VT, SDValue Lo, SDValue Hi)
bool isRegImmLoadOrStore(SDNode *User, SDValue Add)
static cl::opt< bool > UsePseudoMovImm("riscv-use-rematerializable-movimm", cl::Hidden, cl::desc("Use a rematerializable pseudoinstruction for 2 instruction " "constant materialization"), cl::init(false))
static SDValue findVSplat(SDValue N)
static bool isApplicableToPLIOrPLUI(int Val)
#define INST_ALL_NF_CASE(NAME)
Contains matchers for matching SelectionDAG nodes and values.
#define LLVM_DEBUG(...)
Definition Debug.h:114
#define PASS_NAME
DEMANGLE_DUMP_METHOD void dump() const
bool isZero() const
Definition APFloat.h:1512
APInt bitcastToAPInt() const
Definition APFloat.h:1408
bool isPosZero() const
Definition APFloat.h:1527
bool isNegZero() const
Definition APFloat.h:1528
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:967
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
LLVM_ABI bool isSplat(unsigned SplatSizeInBits) const
Check if the APInt consists of a repeated bit pattern.
Definition APInt.cpp:630
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1264
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
const APFloat & getValueAPF() const
uint64_t getZExtValue() const
int64_t getSExtValue() const
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
This class is used to form a handle around another node that is persistent and is updated across invo...
const SDValue & getValue() const
static StringRef getMemConstraintName(ConstraintCode C)
Definition InlineAsm.h:475
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Describe properties that are true of each instruction in the target description file.
Machine Value Type.
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
MVT getVectorElementType() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
void setFlags(Flags f)
Bitwise OR the current flags with the given flags.
An SDNode that represents everything that will be needed to construct a MachineInstr.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
RISCVDAGToDAGISelLegacy(RISCVTargetMachine &TargetMachine, CodeGenOptLevel OptLevel)
bool selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal, SDValue &Val)
RISC-V doesn't have general instructions for integer setne/seteq, but we can check for equality with ...
bool selectSExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectNegImm(SDValue N, SDValue &Val)
bool selectZExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectSHXADD_UWOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD_UW.
bool areOffsetsWithinAlignment(SDValue Addr, Align Alignment)
bool hasAllNBitUsers(SDNode *Node, unsigned Bits, const unsigned Depth=0) const
bool SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, SDValue &Offset)
Similar to SelectAddrRegImm, except that the least significant 5 bits of Offset should be all zeros.
bool selectZExtImm32(SDValue N, SDValue &Val)
bool SelectAddrRegZextRegScale(SDValue Addr, unsigned MaxShiftAmount, unsigned Bits, SDValue &Base, SDValue &Index, SDValue &Scale)
bool SelectAddrRegReg(SDValue Addr, SDValue &Base, SDValue &Offset)
bool selectVMNOT_VLOp(SDNode *Parent, SDValue N, SDValue &Res)
void selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsOrdered)
void selectVLSEGFF(SDNode *Node, unsigned NF, bool IsMasked)
bool selectVSplatSimm5Plus1NoDec(SDValue N, SDValue &SplatVal)
bool selectSimm5Shl2(SDValue N, SDValue &Simm5, SDValue &Shl2)
void selectSF_VC_X_SE(SDNode *Node)
bool orDisjoint(const SDNode *Node) const
bool tryWideningMulAcc(SDNode *Node, const SDLoc &DL)
bool selectLow8BitsVSplat(SDValue N, SDValue &SplatVal)
bool hasAllHUsers(SDNode *Node) const
bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
bool selectVSplatSimm5(SDValue N, SDValue &SplatVal)
bool selectRVVSimm5(SDValue N, unsigned Width, SDValue &Imm)
bool SelectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset)
bool tryUnsignedBitfieldInsertInZero(SDNode *Node, const SDLoc &DL, MVT VT, SDValue X, unsigned Msb, unsigned Lsb)
bool hasAllWUsers(SDNode *Node) const
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
bool selectInvLogicImm(SDValue N, SDValue &Val)
bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset)
void Select(SDNode *Node) override
Main hook for targets to transform nodes into machine nodes.
void selectXSfmmVSET(SDNode *Node)
bool trySignedBitfieldInsertInSign(SDNode *Node)
bool selectVSplat(SDValue N, SDValue &SplatVal)
void addVectorLoadStoreOperands(SDNode *Node, unsigned SEWImm, const SDLoc &DL, unsigned CurOp, bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl< SDValue > &Operands, bool IsLoad=false, MVT *IndexVT=nullptr)
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
bool SelectAddrRegImm9(SDValue Addr, SDValue &Base, SDValue &Offset)
Similar to SelectAddrRegImm, except that the offset is restricted to uimm9.
bool selectScalarFPAsInt(SDValue N, SDValue &Imm)
bool hasAllBUsers(SDNode *Node) const
void selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsStrided)
bool tryShrinkShlLogicImm(SDNode *Node)
void selectVSETVLI(SDNode *Node)
bool selectVLOp(SDValue N, SDValue &VL)
bool trySignedBitfieldExtract(SDNode *Node)
bool selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal)
bool selectVMNOTOp(SDValue N, SDValue &Res)
void selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsStrided)
bool selectVSplatImm64Neg(SDValue N, SDValue &SplatVal)
bool selectVSplatSimm5Plus1NonZero(SDValue N, SDValue &SplatVal)
bool tryUnsignedBitfieldExtract(SDNode *Node, const SDLoc &DL, MVT VT, SDValue X, unsigned Msb, unsigned Lsb)
void selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsOrdered)
bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt)
bool selectSHXADDOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD.
bool tryIndexedLoad(SDNode *Node)
bool SelectAddrRegRegScale(SDValue Addr, unsigned MaxShiftAmount, SDValue &Base, SDValue &Index, SDValue &Scale)
bool selectVSplatUimm(SDValue N, unsigned Bits, SDValue &SplatVal)
bool hasShlAdd(int64_t ShAmt) const
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
static unsigned getRegClassIDForVecVT(MVT VT)
static RISCVVType::VLMUL getLMUL(MVT VT)
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
SelectionDAGISelLegacy(char &ID, std::unique_ptr< SelectionDAGISel > S)
const TargetLowering * TLI
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const
IsProfitableToFold - Returns true if it's profitable to fold the specific operand node N of U during ...
static bool IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, CodeGenOptLevel OptLevel, bool IgnoreChains=false)
IsLegalToFold - Returns true if the specific operand node N of U can be folded during instruction sel...
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
static constexpr unsigned MaxRecursionDepth
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
ilist< SDNode >::iterator allnodes_iterator
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
iterator_range< user_iterator > users()
Definition Value.h:426
#define INT64_MIN
Definition DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:819
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:853
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:888
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:978
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
static bool hasVLOp(uint64_t TSFlags)
static bool hasVecPolicyOp(uint64_t TSFlags)
static bool hasSEWOp(uint64_t TSFlags)
static bool isFirstDefTiedToFirstUse(const MCInstrDesc &Desc)
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
SmallVector< Inst, 8 > InstSeq
Definition RISCVMatInt.h:43
static unsigned decodeVSEW(unsigned VSEW)
LLVM_ABI unsigned encodeXSfmmVType(unsigned SEW, unsigned Widen, bool AltFmt)
LLVM_ABI std::pair< unsigned, bool > decodeVLMUL(VLMUL VLMul)
LLVM_ABI unsigned getSEWLMULRatio(unsigned SEW, VLMUL VLMul)
static unsigned decodeTWiden(unsigned TWiden)
LLVM_ABI unsigned encodeVTYPE(VLMUL VLMUL, unsigned SEW, bool TailAgnostic, bool MaskAgnostic, bool AltFmt=false)
unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode)
std::optional< unsigned > getVectorLowDemandedScalarBits(unsigned Opcode, unsigned Log2SEW)
static constexpr unsigned RVVBitsPerBlock
static constexpr int64_t VLMaxSentinel
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
static const MachineMemOperand::Flags MONontemporalBit1
InstructionCost Cost
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:315
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:325
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
static const MachineMemOperand::Flags MONontemporalBit0
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:204
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
unsigned M1(unsigned Val)
Definition VE.h:377
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:263
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
constexpr T maskTrailingZeros(unsigned N)
Create a bitmask with the N right-most bits set to 0, and all other bits set to 1.
Definition MathExtras.h:94
@ Add
Sum of integers.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition VE.h:376
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
FunctionPass * createRISCVISelDag(RISCVTargetMachine &TM, CodeGenOptLevel OptLevel)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:870
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:403
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.