LLVM 22.0.0git
RISCVISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the RISC-V target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "RISCVISelDAGToDAG.h"
17#include "RISCVISelLowering.h"
18#include "RISCVInstrInfo.h"
22#include "llvm/IR/IntrinsicsRISCV.h"
24#include "llvm/Support/Debug.h"
27
28using namespace llvm;
29
30#define DEBUG_TYPE "riscv-isel"
31#define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
32
34 "riscv-use-rematerializable-movimm", cl::Hidden,
35 cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
36 "constant materialization"),
37 cl::init(false));
38
39#define GET_DAGISEL_BODY RISCVDAGToDAGISel
40#include "RISCVGenDAGISel.inc"
41
43 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
44
45 bool MadeChange = false;
46 while (Position != CurDAG->allnodes_begin()) {
47 SDNode *N = &*--Position;
48 if (N->use_empty())
49 continue;
50
51 SDValue Result;
52 switch (N->getOpcode()) {
53 case ISD::SPLAT_VECTOR: {
54 if (Subtarget->enablePExtCodeGen())
55 break;
56 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
57 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
58 MVT VT = N->getSimpleValueType(0);
59 unsigned Opc =
60 VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
61 SDLoc DL(N);
62 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
63 SDValue Src = N->getOperand(0);
64 if (VT.isInteger())
65 Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(),
66 N->getOperand(0));
67 Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL);
68 break;
69 }
70 case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: {
71 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
72 // load. Done after lowering and combining so that we have a chance to
73 // optimize this to VMV_V_X_VL when the upper bits aren't needed.
74 assert(N->getNumOperands() == 4 && "Unexpected number of operands");
75 MVT VT = N->getSimpleValueType(0);
76 SDValue Passthru = N->getOperand(0);
77 SDValue Lo = N->getOperand(1);
78 SDValue Hi = N->getOperand(2);
79 SDValue VL = N->getOperand(3);
80 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
81 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
82 "Unexpected VTs!");
83 MachineFunction &MF = CurDAG->getMachineFunction();
84 SDLoc DL(N);
85
86 // Create temporary stack for each expanding node.
87 SDValue StackSlot =
88 CurDAG->CreateStackTemporary(TypeSize::getFixed(8), Align(8));
89 int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex();
91
92 SDValue Chain = CurDAG->getEntryNode();
93 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
94
95 SDValue OffsetSlot =
96 CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::getFixed(4), DL);
97 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
98 Align(8));
99
100 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
101
102 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
103 SDValue IntID =
104 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
105 SDValue Ops[] = {Chain,
106 IntID,
107 Passthru,
108 StackSlot,
109 CurDAG->getRegister(RISCV::X0, MVT::i64),
110 VL};
111
112 Result = CurDAG->getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
113 MVT::i64, MPI, Align(8),
115 break;
116 }
117 case ISD::FP_EXTEND: {
118 // We only have vector patterns for riscv_fpextend_vl in isel.
119 SDLoc DL(N);
120 MVT VT = N->getSimpleValueType(0);
121 if (!VT.isVector())
122 break;
123 SDValue VLMAX = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
124 SDValue TrueMask = CurDAG->getNode(
125 RISCVISD::VMSET_VL, DL, VT.changeVectorElementType(MVT::i1), VLMAX);
126 Result = CurDAG->getNode(RISCVISD::FP_EXTEND_VL, DL, VT, N->getOperand(0),
127 TrueMask, VLMAX);
128 break;
129 }
130 }
131
132 if (Result) {
133 LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: ");
134 LLVM_DEBUG(N->dump(CurDAG));
135 LLVM_DEBUG(dbgs() << "\nNew: ");
136 LLVM_DEBUG(Result->dump(CurDAG));
137 LLVM_DEBUG(dbgs() << "\n");
138
139 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
140 MadeChange = true;
141 }
142 }
143
144 if (MadeChange)
145 CurDAG->RemoveDeadNodes();
146}
147
149 HandleSDNode Dummy(CurDAG->getRoot());
150 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
151
152 bool MadeChange = false;
153 while (Position != CurDAG->allnodes_begin()) {
154 SDNode *N = &*--Position;
155 // Skip dead nodes and any non-machine opcodes.
156 if (N->use_empty() || !N->isMachineOpcode())
157 continue;
158
159 MadeChange |= doPeepholeSExtW(N);
160
161 // FIXME: This is here only because the VMerge transform doesn't
162 // know how to handle masked true inputs. Once that has been moved
163 // to post-ISEL, this can be deleted as well.
164 MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N));
165 }
166
167 CurDAG->setRoot(Dummy.getValue());
168
169 // After we're done with everything else, convert IMPLICIT_DEF
170 // passthru operands to NoRegister. This is required to workaround
171 // an optimization deficiency in MachineCSE. This really should
172 // be merged back into each of the patterns (i.e. there's no good
173 // reason not to go directly to NoReg), but is being done this way
174 // to allow easy backporting.
175 MadeChange |= doPeepholeNoRegPassThru();
176
177 if (MadeChange)
178 CurDAG->RemoveDeadNodes();
179}
180
181static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
183 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);
184 for (const RISCVMatInt::Inst &Inst : Seq) {
185 SDValue SDImm = CurDAG->getSignedTargetConstant(Inst.getImm(), DL, VT);
186 SDNode *Result = nullptr;
187 switch (Inst.getOpndKind()) {
188 case RISCVMatInt::Imm:
189 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm);
190 break;
192 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg,
193 CurDAG->getRegister(RISCV::X0, VT));
194 break;
196 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg);
197 break;
199 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm);
200 break;
201 }
202
203 // Only the first instruction has X0 as its source.
204 SrcReg = SDValue(Result, 0);
205 }
206
207 return SrcReg;
208}
209
210static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
211 int64_t Imm, const RISCVSubtarget &Subtarget) {
213
214 // Use a rematerializable pseudo instruction for short sequences if enabled.
215 if (Seq.size() == 2 && UsePseudoMovImm)
216 return SDValue(
217 CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT,
218 CurDAG->getSignedTargetConstant(Imm, DL, VT)),
219 0);
220
221 // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
222 // worst an LUI+ADDIW. This will require an extra register, but avoids a
223 // constant pool.
224 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
225 // low and high 32 bits are the same and bit 31 and 63 are set.
226 if (Seq.size() > 3) {
227 unsigned ShiftAmt, AddOpc;
229 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
230 if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
231 SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
232
233 SDValue SLLI = SDValue(
234 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
235 CurDAG->getTargetConstant(ShiftAmt, DL, VT)),
236 0);
237 return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0);
238 }
239 }
240
241 // Otherwise, use the original sequence.
242 return selectImmSeq(CurDAG, DL, VT, Seq);
243}
244
246 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
247 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
248 bool IsLoad, MVT *IndexVT) {
249 SDValue Chain = Node->getOperand(0);
250
251 Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.
252
253 if (IsStridedOrIndexed) {
254 Operands.push_back(Node->getOperand(CurOp++)); // Index.
255 if (IndexVT)
256 *IndexVT = Operands.back()->getSimpleValueType(0);
257 }
258
259 if (IsMasked) {
260 SDValue Mask = Node->getOperand(CurOp++);
261 Operands.push_back(Mask);
262 }
263 SDValue VL;
264 selectVLOp(Node->getOperand(CurOp++), VL);
265 Operands.push_back(VL);
266
267 MVT XLenVT = Subtarget->getXLenVT();
268 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
269 Operands.push_back(SEWOp);
270
271 // At the IR layer, all the masked load intrinsics have policy operands,
272 // none of the others do. All have passthru operands. For our pseudos,
273 // all loads have policy operands.
274 if (IsLoad) {
276 if (IsMasked)
277 Policy = Node->getConstantOperandVal(CurOp++);
278 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
279 Operands.push_back(PolicyOp);
280 }
281
282 Operands.push_back(Chain); // Chain.
283}
284
285void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked,
286 bool IsStrided) {
287 SDLoc DL(Node);
288 MVT VT = Node->getSimpleValueType(0);
289 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
291
292 unsigned CurOp = 2;
294
295 Operands.push_back(Node->getOperand(CurOp++));
296
297 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
298 Operands, /*IsLoad=*/true);
299
300 const RISCV::VLSEGPseudo *P =
301 RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,
302 static_cast<unsigned>(LMUL));
303 MachineSDNode *Load =
304 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
305
306 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
307
308 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
309 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1));
310 CurDAG->RemoveDeadNode(Node);
311}
312
314 bool IsMasked) {
315 SDLoc DL(Node);
316 MVT VT = Node->getSimpleValueType(0);
317 MVT XLenVT = Subtarget->getXLenVT();
318 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
320
321 unsigned CurOp = 2;
323
324 Operands.push_back(Node->getOperand(CurOp++));
325
326 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
327 /*IsStridedOrIndexed*/ false, Operands,
328 /*IsLoad=*/true);
329
330 const RISCV::VLSEGPseudo *P =
331 RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,
332 Log2SEW, static_cast<unsigned>(LMUL));
333 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
334 XLenVT, MVT::Other, Operands);
335
336 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
337
338 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0)); // Result
339 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1)); // VL
340 ReplaceUses(SDValue(Node, 2), SDValue(Load, 2)); // Chain
341 CurDAG->RemoveDeadNode(Node);
342}
343
344void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked,
345 bool IsOrdered) {
346 SDLoc DL(Node);
347 MVT VT = Node->getSimpleValueType(0);
348 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
350
351 unsigned CurOp = 2;
353
354 Operands.push_back(Node->getOperand(CurOp++));
355
356 MVT IndexVT;
357 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
358 /*IsStridedOrIndexed*/ true, Operands,
359 /*IsLoad=*/true, &IndexVT);
360
361#ifndef NDEBUG
362 // Number of element = RVVBitsPerBlock * LMUL / SEW
363 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
364 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
365 if (DecodedLMUL.second)
366 ContainedTyNumElts /= DecodedLMUL.first;
367 else
368 ContainedTyNumElts *= DecodedLMUL.first;
369 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
370 "Element count mismatch");
371#endif
372
374 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
375 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
376 reportFatalUsageError("The V extension does not support EEW=64 for index "
377 "values when XLEN=32");
378 }
379 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
380 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
381 static_cast<unsigned>(IndexLMUL));
382 MachineSDNode *Load =
383 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
384
385 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
386
387 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
388 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1));
389 CurDAG->RemoveDeadNode(Node);
390}
391
392void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked,
393 bool IsStrided) {
394 SDLoc DL(Node);
395 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
396 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
398
399 unsigned CurOp = 2;
401
402 Operands.push_back(Node->getOperand(CurOp++));
403
404 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
405 Operands);
406
407 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
408 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
409 MachineSDNode *Store =
410 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
411
412 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
413
414 ReplaceNode(Node, Store);
415}
416
417void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked,
418 bool IsOrdered) {
419 SDLoc DL(Node);
420 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
421 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
423
424 unsigned CurOp = 2;
426
427 Operands.push_back(Node->getOperand(CurOp++));
428
429 MVT IndexVT;
430 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
431 /*IsStridedOrIndexed*/ true, Operands,
432 /*IsLoad=*/false, &IndexVT);
433
434#ifndef NDEBUG
435 // Number of element = RVVBitsPerBlock * LMUL / SEW
436 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
437 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
438 if (DecodedLMUL.second)
439 ContainedTyNumElts /= DecodedLMUL.first;
440 else
441 ContainedTyNumElts *= DecodedLMUL.first;
442 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
443 "Element count mismatch");
444#endif
445
447 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
448 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
449 reportFatalUsageError("The V extension does not support EEW=64 for index "
450 "values when XLEN=32");
451 }
452 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
453 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
454 static_cast<unsigned>(IndexLMUL));
455 MachineSDNode *Store =
456 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
457
458 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
459
460 ReplaceNode(Node, Store);
461}
462
464 if (!Subtarget->hasVInstructions())
465 return;
466
467 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
468
469 SDLoc DL(Node);
470 MVT XLenVT = Subtarget->getXLenVT();
471
472 unsigned IntNo = Node->getConstantOperandVal(0);
473
474 assert((IntNo == Intrinsic::riscv_vsetvli ||
475 IntNo == Intrinsic::riscv_vsetvlimax) &&
476 "Unexpected vsetvli intrinsic");
477
478 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
479 unsigned Offset = (VLMax ? 1 : 2);
480
481 assert(Node->getNumOperands() == Offset + 2 &&
482 "Unexpected number of operands");
483
484 unsigned SEW =
485 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
486 RISCVVType::VLMUL VLMul = static_cast<RISCVVType::VLMUL>(
487 Node->getConstantOperandVal(Offset + 1) & 0x7);
488
489 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
490 /*MaskAgnostic*/ true);
491 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
492
493 SDValue VLOperand;
494 unsigned Opcode = RISCV::PseudoVSETVLI;
495 if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
496 if (auto VLEN = Subtarget->getRealVLen())
497 if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())
498 VLMax = true;
499 }
500 if (VLMax || isAllOnesConstant(Node->getOperand(1))) {
501 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
502 Opcode = RISCV::PseudoVSETVLIX0;
503 } else {
504 VLOperand = Node->getOperand(1);
505
506 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
507 uint64_t AVL = C->getZExtValue();
508 if (isUInt<5>(AVL)) {
509 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
510 ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL,
511 XLenVT, VLImm, VTypeIOp));
512 return;
513 }
514 }
515 }
516
518 CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp));
519}
520
522 if (!Subtarget->hasVendorXSfmmbase())
523 return;
524
525 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
526
527 SDLoc DL(Node);
528 MVT XLenVT = Subtarget->getXLenVT();
529
530 unsigned IntNo = Node->getConstantOperandVal(0);
531
532 assert((IntNo == Intrinsic::riscv_sf_vsettnt ||
533 IntNo == Intrinsic::riscv_sf_vsettm ||
534 IntNo == Intrinsic::riscv_sf_vsettk) &&
535 "Unexpected XSfmm vset intrinsic");
536
537 unsigned SEW = RISCVVType::decodeVSEW(Node->getConstantOperandVal(2));
538 unsigned Widen = RISCVVType::decodeTWiden(Node->getConstantOperandVal(3));
539 unsigned PseudoOpCode =
540 IntNo == Intrinsic::riscv_sf_vsettnt ? RISCV::PseudoSF_VSETTNT
541 : IntNo == Intrinsic::riscv_sf_vsettm ? RISCV::PseudoSF_VSETTM
542 : RISCV::PseudoSF_VSETTK;
543
544 if (IntNo == Intrinsic::riscv_sf_vsettnt) {
545 unsigned VTypeI = RISCVVType::encodeXSfmmVType(SEW, Widen, 0);
546 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
547
548 ReplaceNode(Node, CurDAG->getMachineNode(PseudoOpCode, DL, XLenVT,
549 Node->getOperand(1), VTypeIOp));
550 } else {
551 SDValue Log2SEW = CurDAG->getTargetConstant(Log2_32(SEW), DL, XLenVT);
552 SDValue TWiden = CurDAG->getTargetConstant(Widen, DL, XLenVT);
554 CurDAG->getMachineNode(PseudoOpCode, DL, XLenVT,
555 Node->getOperand(1), Log2SEW, TWiden));
556 }
557}
558
560 MVT VT = Node->getSimpleValueType(0);
561 unsigned Opcode = Node->getOpcode();
562 assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&
563 "Unexpected opcode");
564 SDLoc DL(Node);
565
566 // For operations of the form (x << C1) op C2, check if we can use
567 // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
568 SDValue N0 = Node->getOperand(0);
569 SDValue N1 = Node->getOperand(1);
570
572 if (!Cst)
573 return false;
574
575 int64_t Val = Cst->getSExtValue();
576
577 // Check if immediate can already use ANDI/ORI/XORI.
578 if (isInt<12>(Val))
579 return false;
580
581 SDValue Shift = N0;
582
583 // If Val is simm32 and we have a sext_inreg from i32, then the binop
584 // produces at least 33 sign bits. We can peek through the sext_inreg and use
585 // a SLLIW at the end.
586 bool SignExt = false;
587 if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
588 N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) {
589 SignExt = true;
590 Shift = N0.getOperand(0);
591 }
592
593 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
594 return false;
595
597 if (!ShlCst)
598 return false;
599
600 uint64_t ShAmt = ShlCst->getZExtValue();
601
602 // Make sure that we don't change the operation by removing bits.
603 // This only matters for OR and XOR, AND is unaffected.
604 uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt);
605 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
606 return false;
607
608 int64_t ShiftedVal = Val >> ShAmt;
609 if (!isInt<12>(ShiftedVal))
610 return false;
611
612 // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
613 if (SignExt && ShAmt >= 32)
614 return false;
615
616 // Ok, we can reorder to get a smaller immediate.
617 unsigned BinOpc;
618 switch (Opcode) {
619 default: llvm_unreachable("Unexpected opcode");
620 case ISD::AND: BinOpc = RISCV::ANDI; break;
621 case ISD::OR: BinOpc = RISCV::ORI; break;
622 case ISD::XOR: BinOpc = RISCV::XORI; break;
623 }
624
625 unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;
626
627 SDNode *BinOp = CurDAG->getMachineNode(
628 BinOpc, DL, VT, Shift.getOperand(0),
629 CurDAG->getSignedTargetConstant(ShiftedVal, DL, VT));
630 SDNode *SLLI =
631 CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0),
632 CurDAG->getTargetConstant(ShAmt, DL, VT));
633 ReplaceNode(Node, SLLI);
634 return true;
635}
636
638 unsigned Opc;
639
640 if (Subtarget->hasVendorXTHeadBb())
641 Opc = RISCV::TH_EXT;
642 else if (Subtarget->hasVendorXAndesPerf())
643 Opc = RISCV::NDS_BFOS;
644 else if (Subtarget->hasVendorXqcibm())
645 Opc = RISCV::QC_EXT;
646 else
647 // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment.
648 return false;
649
650 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
651 if (!N1C)
652 return false;
653
654 SDValue N0 = Node->getOperand(0);
655 if (!N0.hasOneUse())
656 return false;
657
658 auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb,
659 const SDLoc &DL, MVT VT) {
660 if (Opc == RISCV::QC_EXT) {
661 // QC.EXT X, width, shamt
662 // shamt is the same as Lsb
663 // width is the number of bits to extract from the Lsb
664 Msb = Msb - Lsb + 1;
665 }
666 return CurDAG->getMachineNode(Opc, DL, VT, N0.getOperand(0),
667 CurDAG->getTargetConstant(Msb, DL, VT),
668 CurDAG->getTargetConstant(Lsb, DL, VT));
669 };
670
671 SDLoc DL(Node);
672 MVT VT = Node->getSimpleValueType(0);
673 const unsigned RightShAmt = N1C->getZExtValue();
674
675 // Transform (sra (shl X, C1) C2) with C1 < C2
676 // -> (SignedBitfieldExtract X, msb, lsb)
677 if (N0.getOpcode() == ISD::SHL) {
678 auto *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
679 if (!N01C)
680 return false;
681
682 const unsigned LeftShAmt = N01C->getZExtValue();
683 // Make sure that this is a bitfield extraction (i.e., the shift-right
684 // amount can not be less than the left-shift).
685 if (LeftShAmt > RightShAmt)
686 return false;
687
688 const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
689 const unsigned Msb = MsbPlusOne - 1;
690 const unsigned Lsb = RightShAmt - LeftShAmt;
691
692 SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT);
693 ReplaceNode(Node, Sbe);
694 return true;
695 }
696
697 // Transform (sra (sext_inreg X, _), C) ->
698 // (SignedBitfieldExtract X, msb, lsb)
699 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
700 unsigned ExtSize =
701 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
702
703 // ExtSize of 32 should use sraiw via tablegen pattern.
704 if (ExtSize == 32)
705 return false;
706
707 const unsigned Msb = ExtSize - 1;
708 // If the shift-right amount is greater than Msb, it means that extracts
709 // the X[Msb] bit and sign-extend it.
710 const unsigned Lsb = RightShAmt > Msb ? Msb : RightShAmt;
711
712 SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT);
713 ReplaceNode(Node, Sbe);
714 return true;
715 }
716
717 return false;
718}
719
721 // Only supported with XAndesPerf at the moment.
722 if (!Subtarget->hasVendorXAndesPerf())
723 return false;
724
725 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
726 if (!N1C)
727 return false;
728
729 SDValue N0 = Node->getOperand(0);
730 if (!N0.hasOneUse())
731 return false;
732
733 auto BitfieldInsert = [&](SDValue N0, unsigned Msb, unsigned Lsb,
734 const SDLoc &DL, MVT VT) {
735 unsigned Opc = RISCV::NDS_BFOS;
736 // If the Lsb is equal to the Msb, then the Lsb should be 0.
737 if (Lsb == Msb)
738 Lsb = 0;
739 return CurDAG->getMachineNode(Opc, DL, VT, N0.getOperand(0),
740 CurDAG->getTargetConstant(Lsb, DL, VT),
741 CurDAG->getTargetConstant(Msb, DL, VT));
742 };
743
744 SDLoc DL(Node);
745 MVT VT = Node->getSimpleValueType(0);
746 const unsigned RightShAmt = N1C->getZExtValue();
747
748 // Transform (sra (shl X, C1) C2) with C1 > C2
749 // -> (NDS.BFOS X, lsb, msb)
750 if (N0.getOpcode() == ISD::SHL) {
751 auto *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
752 if (!N01C)
753 return false;
754
755 const unsigned LeftShAmt = N01C->getZExtValue();
756 // Make sure that this is a bitfield insertion (i.e., the shift-right
757 // amount should be less than the left-shift).
758 if (LeftShAmt <= RightShAmt)
759 return false;
760
761 const unsigned MsbPlusOne = VT.getSizeInBits() - RightShAmt;
762 const unsigned Msb = MsbPlusOne - 1;
763 const unsigned Lsb = LeftShAmt - RightShAmt;
764
765 SDNode *Sbi = BitfieldInsert(N0, Msb, Lsb, DL, VT);
766 ReplaceNode(Node, Sbi);
767 return true;
768 }
769
770 return false;
771}
772
774 const SDLoc &DL, MVT VT,
775 SDValue X, unsigned Msb,
776 unsigned Lsb) {
777 unsigned Opc;
778
779 if (Subtarget->hasVendorXTHeadBb()) {
780 Opc = RISCV::TH_EXTU;
781 } else if (Subtarget->hasVendorXAndesPerf()) {
782 Opc = RISCV::NDS_BFOZ;
783 } else if (Subtarget->hasVendorXqcibm()) {
784 Opc = RISCV::QC_EXTU;
785 // QC.EXTU X, width, shamt
786 // shamt is the same as Lsb
787 // width is the number of bits to extract from the Lsb
788 Msb = Msb - Lsb + 1;
789 } else {
790 // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment.
791 return false;
792 }
793
794 SDNode *Ube = CurDAG->getMachineNode(Opc, DL, VT, X,
795 CurDAG->getTargetConstant(Msb, DL, VT),
796 CurDAG->getTargetConstant(Lsb, DL, VT));
797 ReplaceNode(Node, Ube);
798 return true;
799}
800
802 const SDLoc &DL, MVT VT,
803 SDValue X, unsigned Msb,
804 unsigned Lsb) {
805 // Only supported with XAndesPerf at the moment.
806 if (!Subtarget->hasVendorXAndesPerf())
807 return false;
808
809 unsigned Opc = RISCV::NDS_BFOZ;
810
811 // If the Lsb is equal to the Msb, then the Lsb should be 0.
812 if (Lsb == Msb)
813 Lsb = 0;
814 SDNode *Ubi = CurDAG->getMachineNode(Opc, DL, VT, X,
815 CurDAG->getTargetConstant(Lsb, DL, VT),
816 CurDAG->getTargetConstant(Msb, DL, VT));
817 ReplaceNode(Node, Ubi);
818 return true;
819}
820
822 // Target does not support indexed loads.
823 if (!Subtarget->hasVendorXTHeadMemIdx())
824 return false;
825
828 if (AM == ISD::UNINDEXED)
829 return false;
830
832 if (!C)
833 return false;
834
835 EVT LoadVT = Ld->getMemoryVT();
836 assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) &&
837 "Unexpected addressing mode");
838 bool IsPre = AM == ISD::PRE_INC;
839 bool IsPost = AM == ISD::POST_INC;
840 int64_t Offset = C->getSExtValue();
841
842 // The constants that can be encoded in the THeadMemIdx instructions
843 // are of the form (sign_extend(imm5) << imm2).
844 unsigned Shift;
845 for (Shift = 0; Shift < 4; Shift++)
846 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
847 break;
848
849 // Constant cannot be encoded.
850 if (Shift == 4)
851 return false;
852
853 bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
854 unsigned Opcode;
855 if (LoadVT == MVT::i8 && IsPre)
856 Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
857 else if (LoadVT == MVT::i8 && IsPost)
858 Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
859 else if (LoadVT == MVT::i16 && IsPre)
860 Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
861 else if (LoadVT == MVT::i16 && IsPost)
862 Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
863 else if (LoadVT == MVT::i32 && IsPre)
864 Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
865 else if (LoadVT == MVT::i32 && IsPost)
866 Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
867 else if (LoadVT == MVT::i64 && IsPre)
868 Opcode = RISCV::TH_LDIB;
869 else if (LoadVT == MVT::i64 && IsPost)
870 Opcode = RISCV::TH_LDIA;
871 else
872 return false;
873
874 EVT Ty = Ld->getOffset().getValueType();
875 SDValue Ops[] = {
876 Ld->getBasePtr(),
877 CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(Node), Ty),
878 CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty), Ld->getChain()};
879 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0),
880 Ld->getValueType(1), MVT::Other, Ops);
881
882 MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand();
883 CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp});
884
885 ReplaceNode(Node, New);
886
887 return true;
888}
889
890static Register getTileReg(uint64_t TileNum) {
891 assert(TileNum <= 15 && "Invalid tile number");
892 return RISCV::T0 + TileNum;
893}
894
896 if (!Subtarget->hasVInstructions())
897 return;
898
899 assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode");
900
901 SDLoc DL(Node);
902 unsigned IntNo = Node->getConstantOperandVal(1);
903
904 assert((IntNo == Intrinsic::riscv_sf_vc_x_se ||
905 IntNo == Intrinsic::riscv_sf_vc_i_se) &&
906 "Unexpected vsetvli intrinsic");
907
908 // imm, imm, imm, simm5/scalar, sew, log2lmul, vl
909 unsigned Log2SEW = Log2_32(Node->getConstantOperandVal(6));
910 SDValue SEWOp =
911 CurDAG->getTargetConstant(Log2SEW, DL, Subtarget->getXLenVT());
912 SmallVector<SDValue, 8> Operands = {Node->getOperand(2), Node->getOperand(3),
913 Node->getOperand(4), Node->getOperand(5),
914 Node->getOperand(8), SEWOp,
915 Node->getOperand(0)};
916
917 unsigned Opcode;
918 auto *LMulSDNode = cast<ConstantSDNode>(Node->getOperand(7));
919 switch (LMulSDNode->getSExtValue()) {
920 case 5:
921 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF8
922 : RISCV::PseudoSF_VC_I_SE_MF8;
923 break;
924 case 6:
925 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF4
926 : RISCV::PseudoSF_VC_I_SE_MF4;
927 break;
928 case 7:
929 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF2
930 : RISCV::PseudoSF_VC_I_SE_MF2;
931 break;
932 case 0:
933 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M1
934 : RISCV::PseudoSF_VC_I_SE_M1;
935 break;
936 case 1:
937 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M2
938 : RISCV::PseudoSF_VC_I_SE_M2;
939 break;
940 case 2:
941 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M4
942 : RISCV::PseudoSF_VC_I_SE_M4;
943 break;
944 case 3:
945 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M8
946 : RISCV::PseudoSF_VC_I_SE_M8;
947 break;
948 }
949
950 ReplaceNode(Node, CurDAG->getMachineNode(
951 Opcode, DL, Node->getSimpleValueType(0), Operands));
952}
953
954static unsigned getSegInstNF(unsigned Intrinsic) {
955#define INST_NF_CASE(NAME, NF) \
956 case Intrinsic::riscv_##NAME##NF: \
957 return NF;
958#define INST_NF_CASE_MASK(NAME, NF) \
959 case Intrinsic::riscv_##NAME##NF##_mask: \
960 return NF;
961#define INST_NF_CASE_FF(NAME, NF) \
962 case Intrinsic::riscv_##NAME##NF##ff: \
963 return NF;
964#define INST_NF_CASE_FF_MASK(NAME, NF) \
965 case Intrinsic::riscv_##NAME##NF##ff_mask: \
966 return NF;
967#define INST_ALL_NF_CASE_BASE(MACRO_NAME, NAME) \
968 MACRO_NAME(NAME, 2) \
969 MACRO_NAME(NAME, 3) \
970 MACRO_NAME(NAME, 4) \
971 MACRO_NAME(NAME, 5) \
972 MACRO_NAME(NAME, 6) \
973 MACRO_NAME(NAME, 7) \
974 MACRO_NAME(NAME, 8)
975#define INST_ALL_NF_CASE(NAME) \
976 INST_ALL_NF_CASE_BASE(INST_NF_CASE, NAME) \
977 INST_ALL_NF_CASE_BASE(INST_NF_CASE_MASK, NAME)
978#define INST_ALL_NF_CASE_WITH_FF(NAME) \
979 INST_ALL_NF_CASE(NAME) \
980 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF, NAME) \
981 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF_MASK, NAME)
982 switch (Intrinsic) {
983 default:
984 llvm_unreachable("Unexpected segment load/store intrinsic");
986 INST_ALL_NF_CASE(vlsseg)
987 INST_ALL_NF_CASE(vloxseg)
988 INST_ALL_NF_CASE(vluxseg)
989 INST_ALL_NF_CASE(vsseg)
990 INST_ALL_NF_CASE(vssseg)
991 INST_ALL_NF_CASE(vsoxseg)
992 INST_ALL_NF_CASE(vsuxseg)
993 }
994}
995
996static bool isApplicableToPLI(int Val) {
997 // Check if the immediate is packed i8 or i10
998 int16_t Bit31To16 = Val >> 16;
999 int16_t Bit15To0 = Val;
1000 int8_t Bit15To8 = Bit15To0 >> 8;
1001 int8_t Bit7To0 = Val;
1002 if (Bit31To16 != Bit15To0)
1003 return false;
1004
1005 return isInt<10>(Bit31To16) || Bit15To8 == Bit7To0;
1006}
1007
1009 // If we have a custom node, we have already selected.
1010 if (Node->isMachineOpcode()) {
1011 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
1012 Node->setNodeId(-1);
1013 return;
1014 }
1015
1016 // Instruction Selection not handled by the auto-generated tablegen selection
1017 // should be handled here.
1018 unsigned Opcode = Node->getOpcode();
1019 MVT XLenVT = Subtarget->getXLenVT();
1020 SDLoc DL(Node);
1021 MVT VT = Node->getSimpleValueType(0);
1022
1023 bool HasBitTest = Subtarget->hasBEXTILike();
1024
1025 switch (Opcode) {
1026 case ISD::Constant: {
1027 assert((VT == Subtarget->getXLenVT() || VT == MVT::i32) && "Unexpected VT");
1028 auto *ConstNode = cast<ConstantSDNode>(Node);
1029 if (ConstNode->isZero()) {
1030 SDValue New =
1031 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
1032 ReplaceNode(Node, New.getNode());
1033 return;
1034 }
1035 int64_t Imm = ConstNode->getSExtValue();
1036 // If only the lower 8 bits are used, try to convert this to a simm6 by
1037 // sign-extending bit 7. This is neutral without the C extension, and
1038 // allows C.LI to be used if C is present.
1039 if (isUInt<8>(Imm) && isInt<6>(SignExtend64<8>(Imm)) && hasAllBUsers(Node))
1040 Imm = SignExtend64<8>(Imm);
1041 // If the upper XLen-16 bits are not used, try to convert this to a simm12
1042 // by sign extending bit 15.
1043 if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) &&
1045 Imm = SignExtend64<16>(Imm);
1046 // If the upper 32-bits are not used try to convert this into a simm32 by
1047 // sign extending bit 32.
1048 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
1049 Imm = SignExtend64<32>(Imm);
1050
1051 if (Subtarget->enablePExtCodeGen() && isApplicableToPLI(Imm) &&
1052 hasAllWUsers(Node)) {
1053 // If it's 4 packed 8-bit integers or 2 packed signed 16-bit integers, we
1054 // can simply copy lower 32 bits to higher 32 bits to make it able to
1055 // rematerialize to PLI_B or PLI_H
1056 Imm = ((uint64_t)Imm << 32) | (Imm & 0xFFFFFFFF);
1057 }
1058
1059 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode());
1060 return;
1061 }
1062 case ISD::ConstantFP: {
1063 const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();
1064
1065 bool Is64Bit = Subtarget->is64Bit();
1066 bool HasZdinx = Subtarget->hasStdExtZdinx();
1067
1068 bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
1069 SDValue Imm;
1070 // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
1071 // create an integer immediate.
1072 if (APF.isPosZero() || NegZeroF64) {
1073 if (VT == MVT::f64 && HasZdinx && !Is64Bit)
1074 Imm = CurDAG->getRegister(RISCV::X0_Pair, MVT::f64);
1075 else
1076 Imm = CurDAG->getRegister(RISCV::X0, XLenVT);
1077 } else {
1078 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
1079 *Subtarget);
1080 }
1081
1082 unsigned Opc;
1083 switch (VT.SimpleTy) {
1084 default:
1085 llvm_unreachable("Unexpected size");
1086 case MVT::bf16:
1087 assert(Subtarget->hasStdExtZfbfmin());
1088 Opc = RISCV::FMV_H_X;
1089 break;
1090 case MVT::f16:
1091 Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
1092 break;
1093 case MVT::f32:
1094 Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
1095 break;
1096 case MVT::f64:
1097 // For RV32, we can't move from a GPR, we need to convert instead. This
1098 // should only happen for +0.0 and -0.0.
1099 assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
1100 if (HasZdinx)
1101 Opc = RISCV::COPY;
1102 else
1103 Opc = Is64Bit ? RISCV::FMV_D_X : RISCV::FCVT_D_W;
1104 break;
1105 }
1106
1107 SDNode *Res;
1108 if (VT.SimpleTy == MVT::f16 && Opc == RISCV::COPY) {
1109 Res =
1110 CurDAG->getTargetExtractSubreg(RISCV::sub_16, DL, VT, Imm).getNode();
1111 } else if (VT.SimpleTy == MVT::f32 && Opc == RISCV::COPY) {
1112 Res =
1113 CurDAG->getTargetExtractSubreg(RISCV::sub_32, DL, VT, Imm).getNode();
1114 } else if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
1115 Res = CurDAG->getMachineNode(
1116 Opc, DL, VT, Imm,
1117 CurDAG->getTargetConstant(RISCVFPRndMode::RNE, DL, XLenVT));
1118 else
1119 Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
1120
1121 // For f64 -0.0, we need to insert a fneg.d idiom.
1122 if (NegZeroF64) {
1123 Opc = RISCV::FSGNJN_D;
1124 if (HasZdinx)
1125 Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;
1126 Res =
1127 CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0));
1128 }
1129
1130 ReplaceNode(Node, Res);
1131 return;
1132 }
1133 case RISCVISD::BuildGPRPair:
1134 case RISCVISD::BuildPairF64: {
1135 if (Opcode == RISCVISD::BuildPairF64 && !Subtarget->hasStdExtZdinx())
1136 break;
1137
1138 assert((!Subtarget->is64Bit() || Opcode == RISCVISD::BuildGPRPair) &&
1139 "BuildPairF64 only handled here on rv32i_zdinx");
1140
1141 SDValue Ops[] = {
1142 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32),
1143 Node->getOperand(0),
1144 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32),
1145 Node->getOperand(1),
1146 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
1147
1148 SDNode *N = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
1149 ReplaceNode(Node, N);
1150 return;
1151 }
1152 case RISCVISD::SplitGPRPair:
1153 case RISCVISD::SplitF64: {
1154 if (Subtarget->hasStdExtZdinx() || Opcode != RISCVISD::SplitF64) {
1155 assert((!Subtarget->is64Bit() || Opcode == RISCVISD::SplitGPRPair) &&
1156 "SplitF64 only handled here on rv32i_zdinx");
1157
1158 if (!SDValue(Node, 0).use_empty()) {
1159 SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL,
1160 Node->getValueType(0),
1161 Node->getOperand(0));
1162 ReplaceUses(SDValue(Node, 0), Lo);
1163 }
1164
1165 if (!SDValue(Node, 1).use_empty()) {
1166 SDValue Hi = CurDAG->getTargetExtractSubreg(
1167 RISCV::sub_gpr_odd, DL, Node->getValueType(1), Node->getOperand(0));
1168 ReplaceUses(SDValue(Node, 1), Hi);
1169 }
1170
1171 CurDAG->RemoveDeadNode(Node);
1172 return;
1173 }
1174
1175 assert(Opcode != RISCVISD::SplitGPRPair &&
1176 "SplitGPRPair should already be handled");
1177
1178 if (!Subtarget->hasStdExtZfa())
1179 break;
1180 assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
1181 "Unexpected subtarget");
1182
1183 // With Zfa, lower to fmv.x.w and fmvh.x.d.
1184 if (!SDValue(Node, 0).use_empty()) {
1185 SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT,
1186 Node->getOperand(0));
1187 ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0));
1188 }
1189 if (!SDValue(Node, 1).use_empty()) {
1190 SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT,
1191 Node->getOperand(0));
1192 ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0));
1193 }
1194
1195 CurDAG->RemoveDeadNode(Node);
1196 return;
1197 }
1198 case ISD::SHL: {
1199 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1200 if (!N1C)
1201 break;
1202 SDValue N0 = Node->getOperand(0);
1203 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
1205 break;
1206 unsigned ShAmt = N1C->getZExtValue();
1207 uint64_t Mask = N0.getConstantOperandVal(1);
1208
1209 if (isShiftedMask_64(Mask)) {
1210 unsigned XLen = Subtarget->getXLen();
1211 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1212 unsigned TrailingZeros = llvm::countr_zero(Mask);
1213 if (ShAmt <= 32 && TrailingZeros > 0 && LeadingZeros == 32) {
1214 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C)
1215 // where C2 has 32 leading zeros and C3 trailing zeros.
1216 SDNode *SRLIW = CurDAG->getMachineNode(
1217 RISCV::SRLIW, DL, VT, N0.getOperand(0),
1218 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1219 SDNode *SLLI = CurDAG->getMachineNode(
1220 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1221 CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));
1222 ReplaceNode(Node, SLLI);
1223 return;
1224 }
1225 if (TrailingZeros == 0 && LeadingZeros > ShAmt &&
1226 XLen - LeadingZeros > 11 && LeadingZeros != 32) {
1227 // Optimize (shl (and X, C2), C) -> (srli (slli X, C4), C4-C)
1228 // where C2 has C4 leading zeros and no trailing zeros.
1229 // This is profitable if the "and" was to be lowered to
1230 // (srli (slli X, C4), C4) and not (andi X, C2).
1231 // For "LeadingZeros == 32":
1232 // - with Zba it's just (slli.uw X, C)
1233 // - without Zba a tablegen pattern applies the very same
1234 // transform as we would have done here
1235 SDNode *SLLI = CurDAG->getMachineNode(
1236 RISCV::SLLI, DL, VT, N0.getOperand(0),
1237 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1238 SDNode *SRLI = CurDAG->getMachineNode(
1239 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1240 CurDAG->getTargetConstant(LeadingZeros - ShAmt, DL, VT));
1241 ReplaceNode(Node, SRLI);
1242 return;
1243 }
1244 }
1245 break;
1246 }
1247 case ISD::SRL: {
1248 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1249 if (!N1C)
1250 break;
1251 SDValue N0 = Node->getOperand(0);
1252 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1253 break;
1254 unsigned ShAmt = N1C->getZExtValue();
1255 uint64_t Mask = N0.getConstantOperandVal(1);
1256
1257 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
1258 // 32 leading zeros and C3 trailing zeros.
1259 if (isShiftedMask_64(Mask) && N0.hasOneUse()) {
1260 unsigned XLen = Subtarget->getXLen();
1261 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1262 unsigned TrailingZeros = llvm::countr_zero(Mask);
1263 if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
1264 SDNode *SRLIW = CurDAG->getMachineNode(
1265 RISCV::SRLIW, DL, VT, N0.getOperand(0),
1266 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1267 SDNode *SLLI = CurDAG->getMachineNode(
1268 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1269 CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT));
1270 ReplaceNode(Node, SLLI);
1271 return;
1272 }
1273 }
1274
1275 // Optimize (srl (and X, C2), C) ->
1276 // (srli (slli X, (XLen-C3), (XLen-C3) + C)
1277 // Where C2 is a mask with C3 trailing ones.
1278 // Taking into account that the C2 may have had lower bits unset by
1279 // SimplifyDemandedBits. This avoids materializing the C2 immediate.
1280 // This pattern occurs when type legalizing right shifts for types with
1281 // less than XLen bits.
1282 Mask |= maskTrailingOnes<uint64_t>(ShAmt);
1283 if (!isMask_64(Mask))
1284 break;
1285 unsigned TrailingOnes = llvm::countr_one(Mask);
1286 if (ShAmt >= TrailingOnes)
1287 break;
1288 // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.
1289 if (TrailingOnes == 32) {
1290 SDNode *SRLI = CurDAG->getMachineNode(
1291 Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT,
1292 N0.getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1293 ReplaceNode(Node, SRLI);
1294 return;
1295 }
1296
1297 // Only do the remaining transforms if the AND has one use.
1298 if (!N0.hasOneUse())
1299 break;
1300
1301 // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1302 if (HasBitTest && ShAmt + 1 == TrailingOnes) {
1303 SDNode *BEXTI = CurDAG->getMachineNode(
1304 Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT,
1305 N0.getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1306 ReplaceNode(Node, BEXTI);
1307 return;
1308 }
1309
1310 const unsigned Msb = TrailingOnes - 1;
1311 const unsigned Lsb = ShAmt;
1312 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0.getOperand(0), Msb, Lsb))
1313 return;
1314
1315 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
1316 SDNode *SLLI =
1317 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1318 CurDAG->getTargetConstant(LShAmt, DL, VT));
1319 SDNode *SRLI = CurDAG->getMachineNode(
1320 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1321 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1322 ReplaceNode(Node, SRLI);
1323 return;
1324 }
1325 case ISD::SRA: {
1327 return;
1328
1330 return;
1331
1332 // Optimize (sra (sext_inreg X, i16), C) ->
1333 // (srai (slli X, (XLen-16), (XLen-16) + C)
1334 // And (sra (sext_inreg X, i8), C) ->
1335 // (srai (slli X, (XLen-8), (XLen-8) + C)
1336 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1337 // This transform matches the code we get without Zbb. The shifts are more
1338 // compressible, and this can help expose CSE opportunities in the sdiv by
1339 // constant optimization.
1340 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1341 if (!N1C)
1342 break;
1343 SDValue N0 = Node->getOperand(0);
1344 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
1345 break;
1346 unsigned ShAmt = N1C->getZExtValue();
1347 unsigned ExtSize =
1348 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
1349 // ExtSize of 32 should use sraiw via tablegen pattern.
1350 if (ExtSize >= 32 || ShAmt >= ExtSize)
1351 break;
1352 unsigned LShAmt = Subtarget->getXLen() - ExtSize;
1353 SDNode *SLLI =
1354 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1355 CurDAG->getTargetConstant(LShAmt, DL, VT));
1356 SDNode *SRAI = CurDAG->getMachineNode(
1357 RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
1358 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1359 ReplaceNode(Node, SRAI);
1360 return;
1361 }
1362 case ISD::OR: {
1364 return;
1365
1366 break;
1367 }
1368 case ISD::XOR:
1370 return;
1371
1372 break;
1373 case ISD::AND: {
1374 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1375 if (!N1C)
1376 break;
1377
1378 SDValue N0 = Node->getOperand(0);
1379
1380 bool LeftShift = N0.getOpcode() == ISD::SHL;
1381 if (LeftShift || N0.getOpcode() == ISD::SRL) {
1382 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1383 if (!C)
1384 break;
1385 unsigned C2 = C->getZExtValue();
1386 unsigned XLen = Subtarget->getXLen();
1387 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1388
1389 // Keep track of whether this is a c.andi. If we can't use c.andi, the
1390 // shift pair might offer more compression opportunities.
1391 // TODO: We could check for C extension here, but we don't have many lit
1392 // tests with the C extension enabled so not checking gets better
1393 // coverage.
1394 // TODO: What if ANDI faster than shift?
1395 bool IsCANDI = isInt<6>(N1C->getSExtValue());
1396
1397 uint64_t C1 = N1C->getZExtValue();
1398
1399 // Clear irrelevant bits in the mask.
1400 if (LeftShift)
1402 else
1403 C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
1404
1405 // Some transforms should only be done if the shift has a single use or
1406 // the AND would become (srli (slli X, 32), 32)
1407 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
1408
1409 SDValue X = N0.getOperand(0);
1410
1411 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1412 // with c3 leading zeros.
1413 if (!LeftShift && isMask_64(C1)) {
1414 unsigned Leading = XLen - llvm::bit_width(C1);
1415 if (C2 < Leading) {
1416 // If the number of leading zeros is C2+32 this can be SRLIW.
1417 if (C2 + 32 == Leading) {
1418 SDNode *SRLIW = CurDAG->getMachineNode(
1419 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
1420 ReplaceNode(Node, SRLIW);
1421 return;
1422 }
1423
1424 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1425 // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1426 //
1427 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1428 // legalized and goes through DAG combine.
1429 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
1430 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1431 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
1432 SDNode *SRAIW =
1433 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0),
1434 CurDAG->getTargetConstant(31, DL, VT));
1435 SDNode *SRLIW = CurDAG->getMachineNode(
1436 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),
1437 CurDAG->getTargetConstant(Leading - 32, DL, VT));
1438 ReplaceNode(Node, SRLIW);
1439 return;
1440 }
1441
1442 // Try to use an unsigned bitfield extract (e.g., th.extu) if
1443 // available.
1444 // Transform (and (srl x, C2), C1)
1445 // -> (<bfextract> x, msb, lsb)
1446 //
1447 // Make sure to keep this below the SRLIW cases, as we always want to
1448 // prefer the more common instruction.
1449 const unsigned Msb = llvm::bit_width(C1) + C2 - 1;
1450 const unsigned Lsb = C2;
1451 if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
1452 return;
1453
1454 // (srli (slli x, c3-c2), c3).
1455 // Skip if we could use (zext.w (sraiw X, C2)).
1456 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
1457 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1458 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
1459 // Also Skip if we can use bexti or th.tst.
1460 Skip |= HasBitTest && Leading == XLen - 1;
1461 if (OneUseOrZExtW && !Skip) {
1462 SDNode *SLLI = CurDAG->getMachineNode(
1463 RISCV::SLLI, DL, VT, X,
1464 CurDAG->getTargetConstant(Leading - C2, DL, VT));
1465 SDNode *SRLI = CurDAG->getMachineNode(
1466 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1467 CurDAG->getTargetConstant(Leading, DL, VT));
1468 ReplaceNode(Node, SRLI);
1469 return;
1470 }
1471 }
1472 }
1473
1474 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1475 // shifted by c2 bits with c3 leading zeros.
1476 if (LeftShift && isShiftedMask_64(C1)) {
1477 unsigned Leading = XLen - llvm::bit_width(C1);
1478
1479 if (C2 + Leading < XLen &&
1480 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) {
1481 // Use slli.uw when possible.
1482 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
1483 SDNode *SLLI_UW =
1484 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X,
1485 CurDAG->getTargetConstant(C2, DL, VT));
1486 ReplaceNode(Node, SLLI_UW);
1487 return;
1488 }
1489
1490 // Try to use an unsigned bitfield insert (e.g., nds.bfoz) if
1491 // available.
1492 // Transform (and (shl x, c2), c1)
1493 // -> (<bfinsert> x, msb, lsb)
1494 // e.g.
1495 // (and (shl x, 12), 0x00fff000)
1496 // If XLen = 32 and C2 = 12, then
1497 // Msb = 32 - 8 - 1 = 23 and Lsb = 12
1498 const unsigned Msb = XLen - Leading - 1;
1499 const unsigned Lsb = C2;
1500 if (tryUnsignedBitfieldInsertInZero(Node, DL, VT, X, Msb, Lsb))
1501 return;
1502
1503 // (srli (slli c2+c3), c3)
1504 if (OneUseOrZExtW && !IsCANDI) {
1505 SDNode *SLLI = CurDAG->getMachineNode(
1506 RISCV::SLLI, DL, VT, X,
1507 CurDAG->getTargetConstant(C2 + Leading, DL, VT));
1508 SDNode *SRLI = CurDAG->getMachineNode(
1509 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1510 CurDAG->getTargetConstant(Leading, DL, VT));
1511 ReplaceNode(Node, SRLI);
1512 return;
1513 }
1514 }
1515 }
1516
1517 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1518 // shifted mask with c2 leading zeros and c3 trailing zeros.
1519 if (!LeftShift && isShiftedMask_64(C1)) {
1520 unsigned Leading = XLen - llvm::bit_width(C1);
1521 unsigned Trailing = llvm::countr_zero(C1);
1522 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
1523 !IsCANDI) {
1524 unsigned SrliOpc = RISCV::SRLI;
1525 // If the input is zexti32 we should use SRLIW.
1526 if (X.getOpcode() == ISD::AND &&
1527 isa<ConstantSDNode>(X.getOperand(1)) &&
1528 X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
1529 SrliOpc = RISCV::SRLIW;
1530 X = X.getOperand(0);
1531 }
1532 SDNode *SRLI = CurDAG->getMachineNode(
1533 SrliOpc, DL, VT, X,
1534 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1535 SDNode *SLLI = CurDAG->getMachineNode(
1536 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1537 CurDAG->getTargetConstant(Trailing, DL, VT));
1538 ReplaceNode(Node, SLLI);
1539 return;
1540 }
1541 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1542 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
1543 OneUseOrZExtW && !IsCANDI) {
1544 SDNode *SRLIW = CurDAG->getMachineNode(
1545 RISCV::SRLIW, DL, VT, X,
1546 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1547 SDNode *SLLI = CurDAG->getMachineNode(
1548 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1549 CurDAG->getTargetConstant(Trailing, DL, VT));
1550 ReplaceNode(Node, SLLI);
1551 return;
1552 }
1553 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1554 if (Trailing > 0 && Leading + Trailing == 32 && C2 + Trailing < XLen &&
1555 OneUseOrZExtW && Subtarget->hasStdExtZba()) {
1556 SDNode *SRLI = CurDAG->getMachineNode(
1557 RISCV::SRLI, DL, VT, X,
1558 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1559 SDNode *SLLI_UW = CurDAG->getMachineNode(
1560 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1561 CurDAG->getTargetConstant(Trailing, DL, VT));
1562 ReplaceNode(Node, SLLI_UW);
1563 return;
1564 }
1565 }
1566
1567 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1568 // shifted mask with no leading zeros and c3 trailing zeros.
1569 if (LeftShift && isShiftedMask_64(C1)) {
1570 unsigned Leading = XLen - llvm::bit_width(C1);
1571 unsigned Trailing = llvm::countr_zero(C1);
1572 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
1573 SDNode *SRLI = CurDAG->getMachineNode(
1574 RISCV::SRLI, DL, VT, X,
1575 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1576 SDNode *SLLI = CurDAG->getMachineNode(
1577 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1578 CurDAG->getTargetConstant(Trailing, DL, VT));
1579 ReplaceNode(Node, SLLI);
1580 return;
1581 }
1582 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1583 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
1584 SDNode *SRLIW = CurDAG->getMachineNode(
1585 RISCV::SRLIW, DL, VT, X,
1586 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1587 SDNode *SLLI = CurDAG->getMachineNode(
1588 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1589 CurDAG->getTargetConstant(Trailing, DL, VT));
1590 ReplaceNode(Node, SLLI);
1591 return;
1592 }
1593
1594 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1595 if (C2 < Trailing && Leading + Trailing == 32 && OneUseOrZExtW &&
1596 Subtarget->hasStdExtZba()) {
1597 SDNode *SRLI = CurDAG->getMachineNode(
1598 RISCV::SRLI, DL, VT, X,
1599 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1600 SDNode *SLLI_UW = CurDAG->getMachineNode(
1601 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1602 CurDAG->getTargetConstant(Trailing, DL, VT));
1603 ReplaceNode(Node, SLLI_UW);
1604 return;
1605 }
1606 }
1607 }
1608
1609 const uint64_t C1 = N1C->getZExtValue();
1610
1611 if (N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
1612 N0.hasOneUse()) {
1613 unsigned C2 = N0.getConstantOperandVal(1);
1614 unsigned XLen = Subtarget->getXLen();
1615 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1616
1617 SDValue X = N0.getOperand(0);
1618
1619 // Prefer SRAIW + ANDI when possible.
1620 bool Skip = C2 > 32 && isInt<12>(N1C->getSExtValue()) &&
1621 X.getOpcode() == ISD::SHL &&
1622 isa<ConstantSDNode>(X.getOperand(1)) &&
1623 X.getConstantOperandVal(1) == 32;
1624 // Turn (and (sra x, c2), c1) -> (srli (srai x, c2-c3), c3) if c1 is a
1625 // mask with c3 leading zeros and c2 is larger than c3.
1626 if (isMask_64(C1) && !Skip) {
1627 unsigned Leading = XLen - llvm::bit_width(C1);
1628 if (C2 > Leading) {
1629 SDNode *SRAI = CurDAG->getMachineNode(
1630 RISCV::SRAI, DL, VT, X,
1631 CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1632 SDNode *SRLI = CurDAG->getMachineNode(
1633 RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1634 CurDAG->getTargetConstant(Leading, DL, VT));
1635 ReplaceNode(Node, SRLI);
1636 return;
1637 }
1638 }
1639
1640 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
1641 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
1642 // use (slli (srli (srai y, c2 - c3), c3 + c4), c4).
1643 if (isShiftedMask_64(C1) && !Skip) {
1644 unsigned Leading = XLen - llvm::bit_width(C1);
1645 unsigned Trailing = llvm::countr_zero(C1);
1646 if (C2 > Leading && Leading > 0 && Trailing > 0) {
1647 SDNode *SRAI = CurDAG->getMachineNode(
1648 RISCV::SRAI, DL, VT, N0.getOperand(0),
1649 CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1650 SDNode *SRLI = CurDAG->getMachineNode(
1651 RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1652 CurDAG->getTargetConstant(Leading + Trailing, DL, VT));
1653 SDNode *SLLI = CurDAG->getMachineNode(
1654 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1655 CurDAG->getTargetConstant(Trailing, DL, VT));
1656 ReplaceNode(Node, SLLI);
1657 return;
1658 }
1659 }
1660 }
1661
1662 // If C1 masks off the upper bits only (but can't be formed as an
1663 // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1664 // available.
1665 // Transform (and x, C1)
1666 // -> (<bfextract> x, msb, lsb)
1667 if (isMask_64(C1) && !isInt<12>(N1C->getSExtValue()) &&
1668 !(C1 == 0xffff && Subtarget->hasStdExtZbb()) &&
1669 !(C1 == 0xffffffff && Subtarget->hasStdExtZba())) {
1670 const unsigned Msb = llvm::bit_width(C1) - 1;
1671 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0))
1672 return;
1673 }
1674
1676 return;
1677
1678 break;
1679 }
1680 case ISD::MUL: {
1681 // Special case for calculating (mul (and X, C2), C1) where the full product
1682 // fits in XLen bits. We can shift X left by the number of leading zeros in
1683 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1684 // product has XLen trailing zeros, putting it in the output of MULHU. This
1685 // can avoid materializing a constant in a register for C2.
1686
1687 // RHS should be a constant.
1688 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1689 if (!N1C || !N1C->hasOneUse())
1690 break;
1691
1692 // LHS should be an AND with constant.
1693 SDValue N0 = Node->getOperand(0);
1694 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1695 break;
1696
1698
1699 // Constant should be a mask.
1700 if (!isMask_64(C2))
1701 break;
1702
1703 // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1704 // multiple users or the constant is a simm12. This prevents inserting a
1705 // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1706 // make it more costly to materialize. Otherwise, using a SLLI might allow
1707 // it to be compressed.
1708 bool IsANDIOrZExt =
1709 isInt<12>(C2) ||
1710 (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
1711 // With XTHeadBb, we can use TH.EXTU.
1712 IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
1713 if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse()))
1714 break;
1715 // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1716 // the constant is a simm32.
1717 bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
1718 // With XTHeadBb, we can use TH.EXTU.
1719 IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
1720 if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse()))
1721 break;
1722
1723 // We need to shift left the AND input and C1 by a total of XLen bits.
1724
1725 // How far left do we need to shift the AND input?
1726 unsigned XLen = Subtarget->getXLen();
1727 unsigned LeadingZeros = XLen - llvm::bit_width(C2);
1728
1729 // The constant gets shifted by the remaining amount unless that would
1730 // shift bits out.
1731 uint64_t C1 = N1C->getZExtValue();
1732 unsigned ConstantShift = XLen - LeadingZeros;
1733 if (ConstantShift > (XLen - llvm::bit_width(C1)))
1734 break;
1735
1736 uint64_t ShiftedC1 = C1 << ConstantShift;
1737 // If this RV32, we need to sign extend the constant.
1738 if (XLen == 32)
1739 ShiftedC1 = SignExtend64<32>(ShiftedC1);
1740
1741 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1742 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode();
1743 SDNode *SLLI =
1744 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1745 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1746 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,
1747 SDValue(SLLI, 0), SDValue(Imm, 0));
1748 ReplaceNode(Node, MULHU);
1749 return;
1750 }
1751 case ISD::LOAD: {
1752 if (tryIndexedLoad(Node))
1753 return;
1754
1755 if (Subtarget->hasVendorXCVmem() && !Subtarget->is64Bit()) {
1756 // We match post-incrementing load here
1758 if (Load->getAddressingMode() != ISD::POST_INC)
1759 break;
1760
1761 SDValue Chain = Node->getOperand(0);
1762 SDValue Base = Node->getOperand(1);
1763 SDValue Offset = Node->getOperand(2);
1764
1765 bool Simm12 = false;
1766 bool SignExtend = Load->getExtensionType() == ISD::SEXTLOAD;
1767
1768 if (auto ConstantOffset = dyn_cast<ConstantSDNode>(Offset)) {
1769 int ConstantVal = ConstantOffset->getSExtValue();
1770 Simm12 = isInt<12>(ConstantVal);
1771 if (Simm12)
1772 Offset = CurDAG->getTargetConstant(ConstantVal, SDLoc(Offset),
1773 Offset.getValueType());
1774 }
1775
1776 unsigned Opcode = 0;
1777 switch (Load->getMemoryVT().getSimpleVT().SimpleTy) {
1778 case MVT::i8:
1779 if (Simm12 && SignExtend)
1780 Opcode = RISCV::CV_LB_ri_inc;
1781 else if (Simm12 && !SignExtend)
1782 Opcode = RISCV::CV_LBU_ri_inc;
1783 else if (!Simm12 && SignExtend)
1784 Opcode = RISCV::CV_LB_rr_inc;
1785 else
1786 Opcode = RISCV::CV_LBU_rr_inc;
1787 break;
1788 case MVT::i16:
1789 if (Simm12 && SignExtend)
1790 Opcode = RISCV::CV_LH_ri_inc;
1791 else if (Simm12 && !SignExtend)
1792 Opcode = RISCV::CV_LHU_ri_inc;
1793 else if (!Simm12 && SignExtend)
1794 Opcode = RISCV::CV_LH_rr_inc;
1795 else
1796 Opcode = RISCV::CV_LHU_rr_inc;
1797 break;
1798 case MVT::i32:
1799 if (Simm12)
1800 Opcode = RISCV::CV_LW_ri_inc;
1801 else
1802 Opcode = RISCV::CV_LW_rr_inc;
1803 break;
1804 default:
1805 break;
1806 }
1807 if (!Opcode)
1808 break;
1809
1810 ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, XLenVT, XLenVT,
1811 Chain.getSimpleValueType(), Base,
1812 Offset, Chain));
1813 return;
1814 }
1815 break;
1816 }
1817 case RISCVISD::LD_RV32: {
1818 assert(Subtarget->hasStdExtZilsd() && "LD_RV32 is only used with Zilsd");
1819
1821 SDValue Chain = Node->getOperand(0);
1822 SDValue Addr = Node->getOperand(1);
1824
1825 SDValue Ops[] = {Base, Offset, Chain};
1826 MachineSDNode *New = CurDAG->getMachineNode(
1827 RISCV::LD_RV32, DL, {MVT::Untyped, MVT::Other}, Ops);
1828 SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL,
1829 MVT::i32, SDValue(New, 0));
1830 SDValue Hi = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_odd, DL,
1831 MVT::i32, SDValue(New, 0));
1832 CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()});
1833 ReplaceUses(SDValue(Node, 0), Lo);
1834 ReplaceUses(SDValue(Node, 1), Hi);
1835 ReplaceUses(SDValue(Node, 2), SDValue(New, 1));
1836 CurDAG->RemoveDeadNode(Node);
1837 return;
1838 }
1839 case RISCVISD::SD_RV32: {
1841 SDValue Chain = Node->getOperand(0);
1842 SDValue Addr = Node->getOperand(3);
1844
1845 SDValue Lo = Node->getOperand(1);
1846 SDValue Hi = Node->getOperand(2);
1847
1848 SDValue RegPair;
1849 // Peephole to use X0_Pair for storing zero.
1851 RegPair = CurDAG->getRegister(RISCV::X0_Pair, MVT::Untyped);
1852 } else {
1853 SDValue Ops[] = {
1854 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32), Lo,
1855 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32), Hi,
1856 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
1857
1858 RegPair = SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
1859 MVT::Untyped, Ops),
1860 0);
1861 }
1862
1863 MachineSDNode *New = CurDAG->getMachineNode(RISCV::SD_RV32, DL, MVT::Other,
1864 {RegPair, Base, Offset, Chain});
1865 CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()});
1866 ReplaceUses(SDValue(Node, 0), SDValue(New, 0));
1867 CurDAG->RemoveDeadNode(Node);
1868 return;
1869 }
1871 unsigned IntNo = Node->getConstantOperandVal(0);
1872 switch (IntNo) {
1873 // By default we do not custom select any intrinsic.
1874 default:
1875 break;
1876 case Intrinsic::riscv_vmsgeu:
1877 case Intrinsic::riscv_vmsge: {
1878 SDValue Src1 = Node->getOperand(1);
1879 SDValue Src2 = Node->getOperand(2);
1880 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
1881 bool IsCmpConstant = false;
1882 bool IsCmpMinimum = false;
1883 // Only custom select scalar second operand.
1884 if (Src2.getValueType() != XLenVT)
1885 break;
1886 // Small constants are handled with patterns.
1887 int64_t CVal = 0;
1888 MVT Src1VT = Src1.getSimpleValueType();
1889 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1890 IsCmpConstant = true;
1891 CVal = C->getSExtValue();
1892 if (CVal >= -15 && CVal <= 16) {
1893 if (!IsUnsigned || CVal != 0)
1894 break;
1895 IsCmpMinimum = true;
1896 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
1897 Src1VT.getScalarSizeInBits())
1898 .getSExtValue()) {
1899 IsCmpMinimum = true;
1900 }
1901 }
1902 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode, VMSGTOpcode;
1903 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1904 default:
1905 llvm_unreachable("Unexpected LMUL!");
1906#define CASE_VMSLT_OPCODES(lmulenum, suffix) \
1907 case RISCVVType::lmulenum: \
1908 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1909 : RISCV::PseudoVMSLT_VX_##suffix; \
1910 VMSGTOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix \
1911 : RISCV::PseudoVMSGT_VX_##suffix; \
1912 break;
1913 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
1914 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
1915 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
1916 CASE_VMSLT_OPCODES(LMUL_1, M1)
1917 CASE_VMSLT_OPCODES(LMUL_2, M2)
1918 CASE_VMSLT_OPCODES(LMUL_4, M4)
1919 CASE_VMSLT_OPCODES(LMUL_8, M8)
1920#undef CASE_VMSLT_OPCODES
1921 }
1922 // Mask operations use the LMUL from the mask type.
1923 switch (RISCVTargetLowering::getLMUL(VT)) {
1924 default:
1925 llvm_unreachable("Unexpected LMUL!");
1926#define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix) \
1927 case RISCVVType::lmulenum: \
1928 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \
1929 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix; \
1930 break;
1931 CASE_VMNAND_VMSET_OPCODES(LMUL_F8, B64)
1932 CASE_VMNAND_VMSET_OPCODES(LMUL_F4, B32)
1933 CASE_VMNAND_VMSET_OPCODES(LMUL_F2, B16)
1934 CASE_VMNAND_VMSET_OPCODES(LMUL_1, B8)
1935 CASE_VMNAND_VMSET_OPCODES(LMUL_2, B4)
1936 CASE_VMNAND_VMSET_OPCODES(LMUL_4, B2)
1937 CASE_VMNAND_VMSET_OPCODES(LMUL_8, B1)
1938#undef CASE_VMNAND_VMSET_OPCODES
1939 }
1940 SDValue SEW = CurDAG->getTargetConstant(
1941 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1942 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
1943 SDValue VL;
1944 selectVLOp(Node->getOperand(3), VL);
1945
1946 // If vmsge(u) with minimum value, expand it to vmset.
1947 if (IsCmpMinimum) {
1949 CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, MaskSEW));
1950 return;
1951 }
1952
1953 if (IsCmpConstant) {
1954 SDValue Imm =
1955 selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget);
1956
1957 ReplaceNode(Node, CurDAG->getMachineNode(VMSGTOpcode, DL, VT,
1958 {Src1, Imm, VL, SEW}));
1959 return;
1960 }
1961
1962 // Expand to
1963 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
1964 SDValue Cmp = SDValue(
1965 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1966 0);
1967 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT,
1968 {Cmp, Cmp, VL, MaskSEW}));
1969 return;
1970 }
1971 case Intrinsic::riscv_vmsgeu_mask:
1972 case Intrinsic::riscv_vmsge_mask: {
1973 SDValue Src1 = Node->getOperand(2);
1974 SDValue Src2 = Node->getOperand(3);
1975 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
1976 bool IsCmpConstant = false;
1977 bool IsCmpMinimum = false;
1978 // Only custom select scalar second operand.
1979 if (Src2.getValueType() != XLenVT)
1980 break;
1981 // Small constants are handled with patterns.
1982 MVT Src1VT = Src1.getSimpleValueType();
1983 int64_t CVal = 0;
1984 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1985 IsCmpConstant = true;
1986 CVal = C->getSExtValue();
1987 if (CVal >= -15 && CVal <= 16) {
1988 if (!IsUnsigned || CVal != 0)
1989 break;
1990 IsCmpMinimum = true;
1991 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
1992 Src1VT.getScalarSizeInBits())
1993 .getSExtValue()) {
1994 IsCmpMinimum = true;
1995 }
1996 }
1997 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
1998 VMOROpcode, VMSGTMaskOpcode;
1999 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
2000 default:
2001 llvm_unreachable("Unexpected LMUL!");
2002#define CASE_VMSLT_OPCODES(lmulenum, suffix) \
2003 case RISCVVType::lmulenum: \
2004 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
2005 : RISCV::PseudoVMSLT_VX_##suffix; \
2006 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \
2007 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \
2008 VMSGTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix##_MASK \
2009 : RISCV::PseudoVMSGT_VX_##suffix##_MASK; \
2010 break;
2011 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
2012 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
2013 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
2014 CASE_VMSLT_OPCODES(LMUL_1, M1)
2015 CASE_VMSLT_OPCODES(LMUL_2, M2)
2016 CASE_VMSLT_OPCODES(LMUL_4, M4)
2017 CASE_VMSLT_OPCODES(LMUL_8, M8)
2018#undef CASE_VMSLT_OPCODES
2019 }
2020 // Mask operations use the LMUL from the mask type.
2021 switch (RISCVTargetLowering::getLMUL(VT)) {
2022 default:
2023 llvm_unreachable("Unexpected LMUL!");
2024#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \
2025 case RISCVVType::lmulenum: \
2026 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \
2027 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \
2028 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \
2029 break;
2030 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, B64)
2031 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, B32)
2032 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, B16)
2037#undef CASE_VMXOR_VMANDN_VMOR_OPCODES
2038 }
2039 SDValue SEW = CurDAG->getTargetConstant(
2040 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
2041 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
2042 SDValue VL;
2043 selectVLOp(Node->getOperand(5), VL);
2044 SDValue MaskedOff = Node->getOperand(1);
2045 SDValue Mask = Node->getOperand(4);
2046
2047 // If vmsge(u) with minimum value, expand it to vmor mask, maskedoff.
2048 if (IsCmpMinimum) {
2049 // We don't need vmor if the MaskedOff and the Mask are the same
2050 // value.
2051 if (Mask == MaskedOff) {
2052 ReplaceUses(Node, Mask.getNode());
2053 return;
2054 }
2056 CurDAG->getMachineNode(VMOROpcode, DL, VT,
2057 {Mask, MaskedOff, VL, MaskSEW}));
2058 return;
2059 }
2060
2061 // If the MaskedOff value and the Mask are the same value use
2062 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
2063 // This avoids needing to copy v0 to vd before starting the next sequence.
2064 if (Mask == MaskedOff) {
2065 SDValue Cmp = SDValue(
2066 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
2067 0);
2068 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
2069 {Mask, Cmp, VL, MaskSEW}));
2070 return;
2071 }
2072
2073 SDValue PolicyOp =
2074 CurDAG->getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT);
2075
2076 if (IsCmpConstant) {
2077 SDValue Imm =
2078 selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget);
2079
2080 ReplaceNode(Node, CurDAG->getMachineNode(
2081 VMSGTMaskOpcode, DL, VT,
2082 {MaskedOff, Src1, Imm, Mask, VL, SEW, PolicyOp}));
2083 return;
2084 }
2085
2086 // Otherwise use
2087 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
2088 // The result is mask undisturbed.
2089 // We use the same instructions to emulate mask agnostic behavior, because
2090 // the agnostic result can be either undisturbed or all 1.
2091 SDValue Cmp = SDValue(CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
2092 {MaskedOff, Src1, Src2, Mask,
2093 VL, SEW, PolicyOp}),
2094 0);
2095 // vmxor.mm vd, vd, v0 is used to update active value.
2096 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
2097 {Cmp, Mask, VL, MaskSEW}));
2098 return;
2099 }
2100 case Intrinsic::riscv_vsetvli:
2101 case Intrinsic::riscv_vsetvlimax:
2102 return selectVSETVLI(Node);
2103 case Intrinsic::riscv_sf_vsettnt:
2104 case Intrinsic::riscv_sf_vsettm:
2105 case Intrinsic::riscv_sf_vsettk:
2106 return selectXSfmmVSET(Node);
2107 }
2108 break;
2109 }
2111 unsigned IntNo = Node->getConstantOperandVal(1);
2112 switch (IntNo) {
2113 // By default we do not custom select any intrinsic.
2114 default:
2115 break;
2116 case Intrinsic::riscv_vlseg2:
2117 case Intrinsic::riscv_vlseg3:
2118 case Intrinsic::riscv_vlseg4:
2119 case Intrinsic::riscv_vlseg5:
2120 case Intrinsic::riscv_vlseg6:
2121 case Intrinsic::riscv_vlseg7:
2122 case Intrinsic::riscv_vlseg8: {
2123 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2124 /*IsStrided*/ false);
2125 return;
2126 }
2127 case Intrinsic::riscv_vlseg2_mask:
2128 case Intrinsic::riscv_vlseg3_mask:
2129 case Intrinsic::riscv_vlseg4_mask:
2130 case Intrinsic::riscv_vlseg5_mask:
2131 case Intrinsic::riscv_vlseg6_mask:
2132 case Intrinsic::riscv_vlseg7_mask:
2133 case Intrinsic::riscv_vlseg8_mask: {
2134 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2135 /*IsStrided*/ false);
2136 return;
2137 }
2138 case Intrinsic::riscv_vlsseg2:
2139 case Intrinsic::riscv_vlsseg3:
2140 case Intrinsic::riscv_vlsseg4:
2141 case Intrinsic::riscv_vlsseg5:
2142 case Intrinsic::riscv_vlsseg6:
2143 case Intrinsic::riscv_vlsseg7:
2144 case Intrinsic::riscv_vlsseg8: {
2145 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2146 /*IsStrided*/ true);
2147 return;
2148 }
2149 case Intrinsic::riscv_vlsseg2_mask:
2150 case Intrinsic::riscv_vlsseg3_mask:
2151 case Intrinsic::riscv_vlsseg4_mask:
2152 case Intrinsic::riscv_vlsseg5_mask:
2153 case Intrinsic::riscv_vlsseg6_mask:
2154 case Intrinsic::riscv_vlsseg7_mask:
2155 case Intrinsic::riscv_vlsseg8_mask: {
2156 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2157 /*IsStrided*/ true);
2158 return;
2159 }
2160 case Intrinsic::riscv_vloxseg2:
2161 case Intrinsic::riscv_vloxseg3:
2162 case Intrinsic::riscv_vloxseg4:
2163 case Intrinsic::riscv_vloxseg5:
2164 case Intrinsic::riscv_vloxseg6:
2165 case Intrinsic::riscv_vloxseg7:
2166 case Intrinsic::riscv_vloxseg8:
2167 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2168 /*IsOrdered*/ true);
2169 return;
2170 case Intrinsic::riscv_vluxseg2:
2171 case Intrinsic::riscv_vluxseg3:
2172 case Intrinsic::riscv_vluxseg4:
2173 case Intrinsic::riscv_vluxseg5:
2174 case Intrinsic::riscv_vluxseg6:
2175 case Intrinsic::riscv_vluxseg7:
2176 case Intrinsic::riscv_vluxseg8:
2177 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2178 /*IsOrdered*/ false);
2179 return;
2180 case Intrinsic::riscv_vloxseg2_mask:
2181 case Intrinsic::riscv_vloxseg3_mask:
2182 case Intrinsic::riscv_vloxseg4_mask:
2183 case Intrinsic::riscv_vloxseg5_mask:
2184 case Intrinsic::riscv_vloxseg6_mask:
2185 case Intrinsic::riscv_vloxseg7_mask:
2186 case Intrinsic::riscv_vloxseg8_mask:
2187 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2188 /*IsOrdered*/ true);
2189 return;
2190 case Intrinsic::riscv_vluxseg2_mask:
2191 case Intrinsic::riscv_vluxseg3_mask:
2192 case Intrinsic::riscv_vluxseg4_mask:
2193 case Intrinsic::riscv_vluxseg5_mask:
2194 case Intrinsic::riscv_vluxseg6_mask:
2195 case Intrinsic::riscv_vluxseg7_mask:
2196 case Intrinsic::riscv_vluxseg8_mask:
2197 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2198 /*IsOrdered*/ false);
2199 return;
2200 case Intrinsic::riscv_vlseg8ff:
2201 case Intrinsic::riscv_vlseg7ff:
2202 case Intrinsic::riscv_vlseg6ff:
2203 case Intrinsic::riscv_vlseg5ff:
2204 case Intrinsic::riscv_vlseg4ff:
2205 case Intrinsic::riscv_vlseg3ff:
2206 case Intrinsic::riscv_vlseg2ff: {
2207 selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ false);
2208 return;
2209 }
2210 case Intrinsic::riscv_vlseg8ff_mask:
2211 case Intrinsic::riscv_vlseg7ff_mask:
2212 case Intrinsic::riscv_vlseg6ff_mask:
2213 case Intrinsic::riscv_vlseg5ff_mask:
2214 case Intrinsic::riscv_vlseg4ff_mask:
2215 case Intrinsic::riscv_vlseg3ff_mask:
2216 case Intrinsic::riscv_vlseg2ff_mask: {
2217 selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ true);
2218 return;
2219 }
2220 case Intrinsic::riscv_vloxei:
2221 case Intrinsic::riscv_vloxei_mask:
2222 case Intrinsic::riscv_vluxei:
2223 case Intrinsic::riscv_vluxei_mask: {
2224 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
2225 IntNo == Intrinsic::riscv_vluxei_mask;
2226 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
2227 IntNo == Intrinsic::riscv_vloxei_mask;
2228
2229 MVT VT = Node->getSimpleValueType(0);
2230 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2231
2232 unsigned CurOp = 2;
2233 SmallVector<SDValue, 8> Operands;
2234 Operands.push_back(Node->getOperand(CurOp++));
2235
2236 MVT IndexVT;
2237 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2238 /*IsStridedOrIndexed*/ true, Operands,
2239 /*IsLoad=*/true, &IndexVT);
2240
2242 "Element count mismatch");
2243
2246 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2247 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2248 reportFatalUsageError("The V extension does not support EEW=64 for "
2249 "index values when XLEN=32");
2250 }
2251 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
2252 IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
2253 static_cast<unsigned>(IndexLMUL));
2254 MachineSDNode *Load =
2255 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2256
2257 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2258
2259 ReplaceNode(Node, Load);
2260 return;
2261 }
2262 case Intrinsic::riscv_vlm:
2263 case Intrinsic::riscv_vle:
2264 case Intrinsic::riscv_vle_mask:
2265 case Intrinsic::riscv_vlse:
2266 case Intrinsic::riscv_vlse_mask: {
2267 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
2268 IntNo == Intrinsic::riscv_vlse_mask;
2269 bool IsStrided =
2270 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
2271
2272 MVT VT = Node->getSimpleValueType(0);
2273 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2274
2275 // The riscv_vlm intrinsic are always tail agnostic and no passthru
2276 // operand at the IR level. In pseudos, they have both policy and
2277 // passthru operand. The passthru operand is needed to track the
2278 // "tail undefined" state, and the policy is there just for
2279 // for consistency - it will always be "don't care" for the
2280 // unmasked form.
2281 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
2282 unsigned CurOp = 2;
2283 SmallVector<SDValue, 8> Operands;
2284 if (HasPassthruOperand)
2285 Operands.push_back(Node->getOperand(CurOp++));
2286 else {
2287 // We eagerly lower to implicit_def (instead of undef), as we
2288 // otherwise fail to select nodes such as: nxv1i1 = undef
2289 SDNode *Passthru =
2290 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
2291 Operands.push_back(SDValue(Passthru, 0));
2292 }
2293 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2294 Operands, /*IsLoad=*/true);
2295
2297 const RISCV::VLEPseudo *P =
2298 RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW,
2299 static_cast<unsigned>(LMUL));
2300 MachineSDNode *Load =
2301 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2302
2303 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2304
2305 ReplaceNode(Node, Load);
2306 return;
2307 }
2308 case Intrinsic::riscv_vleff:
2309 case Intrinsic::riscv_vleff_mask: {
2310 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
2311
2312 MVT VT = Node->getSimpleValueType(0);
2313 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2314
2315 unsigned CurOp = 2;
2316 SmallVector<SDValue, 7> Operands;
2317 Operands.push_back(Node->getOperand(CurOp++));
2318 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2319 /*IsStridedOrIndexed*/ false, Operands,
2320 /*IsLoad=*/true);
2321
2323 const RISCV::VLEPseudo *P =
2324 RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true,
2325 Log2SEW, static_cast<unsigned>(LMUL));
2326 MachineSDNode *Load = CurDAG->getMachineNode(
2327 P->Pseudo, DL, Node->getVTList(), Operands);
2328 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2329
2330 ReplaceNode(Node, Load);
2331 return;
2332 }
2333 case Intrinsic::riscv_nds_vln:
2334 case Intrinsic::riscv_nds_vln_mask:
2335 case Intrinsic::riscv_nds_vlnu:
2336 case Intrinsic::riscv_nds_vlnu_mask: {
2337 bool IsMasked = IntNo == Intrinsic::riscv_nds_vln_mask ||
2338 IntNo == Intrinsic::riscv_nds_vlnu_mask;
2339 bool IsUnsigned = IntNo == Intrinsic::riscv_nds_vlnu ||
2340 IntNo == Intrinsic::riscv_nds_vlnu_mask;
2341
2342 MVT VT = Node->getSimpleValueType(0);
2343 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2344 unsigned CurOp = 2;
2345 SmallVector<SDValue, 8> Operands;
2346
2347 Operands.push_back(Node->getOperand(CurOp++));
2348 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2349 /*IsStridedOrIndexed=*/false, Operands,
2350 /*IsLoad=*/true);
2351
2353 const RISCV::NDSVLNPseudo *P = RISCV::getNDSVLNPseudo(
2354 IsMasked, IsUnsigned, Log2SEW, static_cast<unsigned>(LMUL));
2355 MachineSDNode *Load =
2356 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2357
2358 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2359 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
2360
2361 ReplaceNode(Node, Load);
2362 return;
2363 }
2364 }
2365 break;
2366 }
2367 case ISD::INTRINSIC_VOID: {
2368 unsigned IntNo = Node->getConstantOperandVal(1);
2369 switch (IntNo) {
2370 case Intrinsic::riscv_vsseg2:
2371 case Intrinsic::riscv_vsseg3:
2372 case Intrinsic::riscv_vsseg4:
2373 case Intrinsic::riscv_vsseg5:
2374 case Intrinsic::riscv_vsseg6:
2375 case Intrinsic::riscv_vsseg7:
2376 case Intrinsic::riscv_vsseg8: {
2377 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2378 /*IsStrided*/ false);
2379 return;
2380 }
2381 case Intrinsic::riscv_vsseg2_mask:
2382 case Intrinsic::riscv_vsseg3_mask:
2383 case Intrinsic::riscv_vsseg4_mask:
2384 case Intrinsic::riscv_vsseg5_mask:
2385 case Intrinsic::riscv_vsseg6_mask:
2386 case Intrinsic::riscv_vsseg7_mask:
2387 case Intrinsic::riscv_vsseg8_mask: {
2388 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2389 /*IsStrided*/ false);
2390 return;
2391 }
2392 case Intrinsic::riscv_vssseg2:
2393 case Intrinsic::riscv_vssseg3:
2394 case Intrinsic::riscv_vssseg4:
2395 case Intrinsic::riscv_vssseg5:
2396 case Intrinsic::riscv_vssseg6:
2397 case Intrinsic::riscv_vssseg7:
2398 case Intrinsic::riscv_vssseg8: {
2399 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2400 /*IsStrided*/ true);
2401 return;
2402 }
2403 case Intrinsic::riscv_vssseg2_mask:
2404 case Intrinsic::riscv_vssseg3_mask:
2405 case Intrinsic::riscv_vssseg4_mask:
2406 case Intrinsic::riscv_vssseg5_mask:
2407 case Intrinsic::riscv_vssseg6_mask:
2408 case Intrinsic::riscv_vssseg7_mask:
2409 case Intrinsic::riscv_vssseg8_mask: {
2410 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2411 /*IsStrided*/ true);
2412 return;
2413 }
2414 case Intrinsic::riscv_vsoxseg2:
2415 case Intrinsic::riscv_vsoxseg3:
2416 case Intrinsic::riscv_vsoxseg4:
2417 case Intrinsic::riscv_vsoxseg5:
2418 case Intrinsic::riscv_vsoxseg6:
2419 case Intrinsic::riscv_vsoxseg7:
2420 case Intrinsic::riscv_vsoxseg8:
2421 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2422 /*IsOrdered*/ true);
2423 return;
2424 case Intrinsic::riscv_vsuxseg2:
2425 case Intrinsic::riscv_vsuxseg3:
2426 case Intrinsic::riscv_vsuxseg4:
2427 case Intrinsic::riscv_vsuxseg5:
2428 case Intrinsic::riscv_vsuxseg6:
2429 case Intrinsic::riscv_vsuxseg7:
2430 case Intrinsic::riscv_vsuxseg8:
2431 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2432 /*IsOrdered*/ false);
2433 return;
2434 case Intrinsic::riscv_vsoxseg2_mask:
2435 case Intrinsic::riscv_vsoxseg3_mask:
2436 case Intrinsic::riscv_vsoxseg4_mask:
2437 case Intrinsic::riscv_vsoxseg5_mask:
2438 case Intrinsic::riscv_vsoxseg6_mask:
2439 case Intrinsic::riscv_vsoxseg7_mask:
2440 case Intrinsic::riscv_vsoxseg8_mask:
2441 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2442 /*IsOrdered*/ true);
2443 return;
2444 case Intrinsic::riscv_vsuxseg2_mask:
2445 case Intrinsic::riscv_vsuxseg3_mask:
2446 case Intrinsic::riscv_vsuxseg4_mask:
2447 case Intrinsic::riscv_vsuxseg5_mask:
2448 case Intrinsic::riscv_vsuxseg6_mask:
2449 case Intrinsic::riscv_vsuxseg7_mask:
2450 case Intrinsic::riscv_vsuxseg8_mask:
2451 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2452 /*IsOrdered*/ false);
2453 return;
2454 case Intrinsic::riscv_vsoxei:
2455 case Intrinsic::riscv_vsoxei_mask:
2456 case Intrinsic::riscv_vsuxei:
2457 case Intrinsic::riscv_vsuxei_mask: {
2458 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
2459 IntNo == Intrinsic::riscv_vsuxei_mask;
2460 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
2461 IntNo == Intrinsic::riscv_vsoxei_mask;
2462
2463 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2464 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2465
2466 unsigned CurOp = 2;
2467 SmallVector<SDValue, 8> Operands;
2468 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2469
2470 MVT IndexVT;
2471 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2472 /*IsStridedOrIndexed*/ true, Operands,
2473 /*IsLoad=*/false, &IndexVT);
2474
2476 "Element count mismatch");
2477
2480 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2481 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2482 reportFatalUsageError("The V extension does not support EEW=64 for "
2483 "index values when XLEN=32");
2484 }
2485 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
2486 IsMasked, IsOrdered, IndexLog2EEW,
2487 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
2488 MachineSDNode *Store =
2489 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2490
2491 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
2492
2493 ReplaceNode(Node, Store);
2494 return;
2495 }
2496 case Intrinsic::riscv_vsm:
2497 case Intrinsic::riscv_vse:
2498 case Intrinsic::riscv_vse_mask:
2499 case Intrinsic::riscv_vsse:
2500 case Intrinsic::riscv_vsse_mask: {
2501 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
2502 IntNo == Intrinsic::riscv_vsse_mask;
2503 bool IsStrided =
2504 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
2505
2506 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2507 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2508
2509 unsigned CurOp = 2;
2510 SmallVector<SDValue, 8> Operands;
2511 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2512
2513 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2514 Operands);
2515
2517 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
2518 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
2519 MachineSDNode *Store =
2520 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2521 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
2522
2523 ReplaceNode(Node, Store);
2524 return;
2525 }
2526 case Intrinsic::riscv_sf_vc_x_se:
2527 case Intrinsic::riscv_sf_vc_i_se:
2529 return;
2530 case Intrinsic::riscv_sf_vlte8:
2531 case Intrinsic::riscv_sf_vlte16:
2532 case Intrinsic::riscv_sf_vlte32:
2533 case Intrinsic::riscv_sf_vlte64: {
2534 unsigned Log2SEW;
2535 unsigned PseudoInst;
2536 switch (IntNo) {
2537 case Intrinsic::riscv_sf_vlte8:
2538 PseudoInst = RISCV::PseudoSF_VLTE8;
2539 Log2SEW = 3;
2540 break;
2541 case Intrinsic::riscv_sf_vlte16:
2542 PseudoInst = RISCV::PseudoSF_VLTE16;
2543 Log2SEW = 4;
2544 break;
2545 case Intrinsic::riscv_sf_vlte32:
2546 PseudoInst = RISCV::PseudoSF_VLTE32;
2547 Log2SEW = 5;
2548 break;
2549 case Intrinsic::riscv_sf_vlte64:
2550 PseudoInst = RISCV::PseudoSF_VLTE64;
2551 Log2SEW = 6;
2552 break;
2553 }
2554
2555 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
2556 SDValue TWidenOp = CurDAG->getTargetConstant(1, DL, XLenVT);
2557 SDValue Operands[] = {Node->getOperand(2),
2558 Node->getOperand(3),
2559 Node->getOperand(4),
2560 SEWOp,
2561 TWidenOp,
2562 Node->getOperand(0)};
2563
2564 MachineSDNode *TileLoad =
2565 CurDAG->getMachineNode(PseudoInst, DL, Node->getVTList(), Operands);
2566 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2567 CurDAG->setNodeMemRefs(TileLoad, {MemOp->getMemOperand()});
2568
2569 ReplaceNode(Node, TileLoad);
2570 return;
2571 }
2572 case Intrinsic::riscv_sf_mm_s_s:
2573 case Intrinsic::riscv_sf_mm_s_u:
2574 case Intrinsic::riscv_sf_mm_u_s:
2575 case Intrinsic::riscv_sf_mm_u_u:
2576 case Intrinsic::riscv_sf_mm_e5m2_e5m2:
2577 case Intrinsic::riscv_sf_mm_e5m2_e4m3:
2578 case Intrinsic::riscv_sf_mm_e4m3_e5m2:
2579 case Intrinsic::riscv_sf_mm_e4m3_e4m3:
2580 case Intrinsic::riscv_sf_mm_f_f: {
2581 bool HasFRM = false;
2582 unsigned PseudoInst;
2583 switch (IntNo) {
2584 case Intrinsic::riscv_sf_mm_s_s:
2585 PseudoInst = RISCV::PseudoSF_MM_S_S;
2586 break;
2587 case Intrinsic::riscv_sf_mm_s_u:
2588 PseudoInst = RISCV::PseudoSF_MM_S_U;
2589 break;
2590 case Intrinsic::riscv_sf_mm_u_s:
2591 PseudoInst = RISCV::PseudoSF_MM_U_S;
2592 break;
2593 case Intrinsic::riscv_sf_mm_u_u:
2594 PseudoInst = RISCV::PseudoSF_MM_U_U;
2595 break;
2596 case Intrinsic::riscv_sf_mm_e5m2_e5m2:
2597 PseudoInst = RISCV::PseudoSF_MM_E5M2_E5M2;
2598 HasFRM = true;
2599 break;
2600 case Intrinsic::riscv_sf_mm_e5m2_e4m3:
2601 PseudoInst = RISCV::PseudoSF_MM_E5M2_E4M3;
2602 HasFRM = true;
2603 break;
2604 case Intrinsic::riscv_sf_mm_e4m3_e5m2:
2605 PseudoInst = RISCV::PseudoSF_MM_E4M3_E5M2;
2606 HasFRM = true;
2607 break;
2608 case Intrinsic::riscv_sf_mm_e4m3_e4m3:
2609 PseudoInst = RISCV::PseudoSF_MM_E4M3_E4M3;
2610 HasFRM = true;
2611 break;
2612 case Intrinsic::riscv_sf_mm_f_f:
2613 if (Node->getOperand(3).getValueType().getScalarType() == MVT::bf16)
2614 PseudoInst = RISCV::PseudoSF_MM_F_F_ALT;
2615 else
2616 PseudoInst = RISCV::PseudoSF_MM_F_F;
2617 HasFRM = true;
2618 break;
2619 }
2620 uint64_t TileNum = Node->getConstantOperandVal(2);
2621 SDValue Op1 = Node->getOperand(3);
2622 SDValue Op2 = Node->getOperand(4);
2623 MVT VT = Op1->getSimpleValueType(0);
2624 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2625 SDValue TmOp = Node->getOperand(5);
2626 SDValue TnOp = Node->getOperand(6);
2627 SDValue TkOp = Node->getOperand(7);
2628 SDValue TWidenOp = Node->getOperand(8);
2629 SDValue Chain = Node->getOperand(0);
2630
2631 // sf.mm.f.f with sew=32, twiden=2 is invalid
2632 if (IntNo == Intrinsic::riscv_sf_mm_f_f && Log2SEW == 5 &&
2633 TWidenOp->getAsZExtVal() == 2)
2634 reportFatalUsageError("sf.mm.f.f doesn't support (sew=32, twiden=2)");
2635
2636 SmallVector<SDValue, 10> Operands(
2637 {CurDAG->getRegister(getTileReg(TileNum), XLenVT), Op1, Op2});
2638 if (HasFRM)
2639 Operands.push_back(
2640 CurDAG->getTargetConstant(RISCVFPRndMode::DYN, DL, XLenVT));
2641 Operands.append({TmOp, TnOp, TkOp,
2642 CurDAG->getTargetConstant(Log2SEW, DL, XLenVT), TWidenOp,
2643 Chain});
2644
2645 auto *NewNode =
2646 CurDAG->getMachineNode(PseudoInst, DL, Node->getVTList(), Operands);
2647
2648 ReplaceNode(Node, NewNode);
2649 return;
2650 }
2651 case Intrinsic::riscv_sf_vtzero_t: {
2652 uint64_t TileNum = Node->getConstantOperandVal(2);
2653 SDValue Tm = Node->getOperand(3);
2654 SDValue Tn = Node->getOperand(4);
2655 SDValue Log2SEW = Node->getOperand(5);
2656 SDValue TWiden = Node->getOperand(6);
2657 SDValue Chain = Node->getOperand(0);
2658 auto *NewNode = CurDAG->getMachineNode(
2659 RISCV::PseudoSF_VTZERO_T, DL, Node->getVTList(),
2660 {CurDAG->getRegister(getTileReg(TileNum), XLenVT), Tm, Tn, Log2SEW,
2661 TWiden, Chain});
2662
2663 ReplaceNode(Node, NewNode);
2664 return;
2665 }
2666 }
2667 break;
2668 }
2669 case ISD::BITCAST: {
2670 MVT SrcVT = Node->getOperand(0).getSimpleValueType();
2671 // Just drop bitcasts between vectors if both are fixed or both are
2672 // scalable.
2673 if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
2674 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
2675 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2676 CurDAG->RemoveDeadNode(Node);
2677 return;
2678 }
2679 if (Subtarget->enablePExtCodeGen()) {
2680 bool Is32BitCast =
2681 (VT == MVT::i32 && (SrcVT == MVT::v4i8 || SrcVT == MVT::v2i16)) ||
2682 (SrcVT == MVT::i32 && (VT == MVT::v4i8 || VT == MVT::v2i16));
2683 bool Is64BitCast =
2684 (VT == MVT::i64 && (SrcVT == MVT::v8i8 || SrcVT == MVT::v4i16 ||
2685 SrcVT == MVT::v2i32)) ||
2686 (SrcVT == MVT::i64 &&
2687 (VT == MVT::v8i8 || VT == MVT::v4i16 || VT == MVT::v2i32));
2688 if (Is32BitCast || Is64BitCast) {
2689 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2690 CurDAG->RemoveDeadNode(Node);
2691 return;
2692 }
2693 }
2694 break;
2695 }
2697 if (Subtarget->enablePExtCodeGen()) {
2698 MVT SrcVT = Node->getOperand(0).getSimpleValueType();
2699 if (VT == MVT::v2i32 && SrcVT == MVT::i64) {
2700 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2701 CurDAG->RemoveDeadNode(Node);
2702 return;
2703 }
2704 }
2705 break;
2707 case RISCVISD::TUPLE_INSERT: {
2708 SDValue V = Node->getOperand(0);
2709 SDValue SubV = Node->getOperand(1);
2710 SDLoc DL(SubV);
2711 auto Idx = Node->getConstantOperandVal(2);
2712 MVT SubVecVT = SubV.getSimpleValueType();
2713
2714 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2715 MVT SubVecContainerVT = SubVecVT;
2716 // Establish the correct scalable-vector types for any fixed-length type.
2717 if (SubVecVT.isFixedLengthVector()) {
2718 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
2720 [[maybe_unused]] bool ExactlyVecRegSized =
2721 Subtarget->expandVScale(SubVecVT.getSizeInBits())
2722 .isKnownMultipleOf(Subtarget->expandVScale(VecRegSize));
2723 assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())
2724 .getKnownMinValue()));
2725 assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
2726 }
2727 MVT ContainerVT = VT;
2728 if (VT.isFixedLengthVector())
2729 ContainerVT = TLI.getContainerForFixedLengthVector(VT);
2730
2731 const auto *TRI = Subtarget->getRegisterInfo();
2732 unsigned SubRegIdx;
2733 std::tie(SubRegIdx, Idx) =
2735 ContainerVT, SubVecContainerVT, Idx, TRI);
2736
2737 // If the Idx hasn't been completely eliminated then this is a subvector
2738 // insert which doesn't naturally align to a vector register. These must
2739 // be handled using instructions to manipulate the vector registers.
2740 if (Idx != 0)
2741 break;
2742
2743 RISCVVType::VLMUL SubVecLMUL =
2744 RISCVTargetLowering::getLMUL(SubVecContainerVT);
2745 [[maybe_unused]] bool IsSubVecPartReg =
2746 SubVecLMUL == RISCVVType::VLMUL::LMUL_F2 ||
2747 SubVecLMUL == RISCVVType::VLMUL::LMUL_F4 ||
2748 SubVecLMUL == RISCVVType::VLMUL::LMUL_F8;
2749 assert((V.getValueType().isRISCVVectorTuple() || !IsSubVecPartReg ||
2750 V.isUndef()) &&
2751 "Expecting lowering to have created legal INSERT_SUBVECTORs when "
2752 "the subvector is smaller than a full-sized register");
2753
2754 // If we haven't set a SubRegIdx, then we must be going between
2755 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
2756 if (SubRegIdx == RISCV::NoSubRegister) {
2757 unsigned InRegClassID =
2760 InRegClassID &&
2761 "Unexpected subvector extraction");
2762 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2763 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
2764 DL, VT, SubV, RC);
2765 ReplaceNode(Node, NewNode);
2766 return;
2767 }
2768
2769 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV);
2770 ReplaceNode(Node, Insert.getNode());
2771 return;
2772 }
2774 case RISCVISD::TUPLE_EXTRACT: {
2775 SDValue V = Node->getOperand(0);
2776 auto Idx = Node->getConstantOperandVal(1);
2777 MVT InVT = V.getSimpleValueType();
2778 SDLoc DL(V);
2779
2780 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2781 MVT SubVecContainerVT = VT;
2782 // Establish the correct scalable-vector types for any fixed-length type.
2783 if (VT.isFixedLengthVector()) {
2784 assert(Idx == 0);
2785 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
2786 }
2787 if (InVT.isFixedLengthVector())
2788 InVT = TLI.getContainerForFixedLengthVector(InVT);
2789
2790 const auto *TRI = Subtarget->getRegisterInfo();
2791 unsigned SubRegIdx;
2792 std::tie(SubRegIdx, Idx) =
2794 InVT, SubVecContainerVT, Idx, TRI);
2795
2796 // If the Idx hasn't been completely eliminated then this is a subvector
2797 // extract which doesn't naturally align to a vector register. These must
2798 // be handled using instructions to manipulate the vector registers.
2799 if (Idx != 0)
2800 break;
2801
2802 // If we haven't set a SubRegIdx, then we must be going between
2803 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
2804 if (SubRegIdx == RISCV::NoSubRegister) {
2805 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT);
2807 InRegClassID &&
2808 "Unexpected subvector extraction");
2809 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2810 SDNode *NewNode =
2811 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
2812 ReplaceNode(Node, NewNode);
2813 return;
2814 }
2815
2816 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
2817 ReplaceNode(Node, Extract.getNode());
2818 return;
2819 }
2820 case RISCVISD::VMV_S_X_VL:
2821 case RISCVISD::VFMV_S_F_VL:
2822 case RISCVISD::VMV_V_X_VL:
2823 case RISCVISD::VFMV_V_F_VL: {
2824 // Try to match splat of a scalar load to a strided load with stride of x0.
2825 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
2826 Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
2827 if (!Node->getOperand(0).isUndef())
2828 break;
2829 SDValue Src = Node->getOperand(1);
2830 auto *Ld = dyn_cast<LoadSDNode>(Src);
2831 // Can't fold load update node because the second
2832 // output is used so that load update node can't be removed.
2833 if (!Ld || Ld->isIndexed())
2834 break;
2835 EVT MemVT = Ld->getMemoryVT();
2836 // The memory VT should be the same size as the element type.
2837 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
2838 break;
2839 if (!IsProfitableToFold(Src, Node, Node) ||
2840 !IsLegalToFold(Src, Node, Node, TM.getOptLevel()))
2841 break;
2842
2843 SDValue VL;
2844 if (IsScalarMove) {
2845 // We could deal with more VL if we update the VSETVLI insert pass to
2846 // avoid introducing more VSETVLI.
2847 if (!isOneConstant(Node->getOperand(2)))
2848 break;
2849 selectVLOp(Node->getOperand(2), VL);
2850 } else
2851 selectVLOp(Node->getOperand(2), VL);
2852
2853 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2854 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
2855
2856 // If VL=1, then we don't need to do a strided load and can just do a
2857 // regular load.
2858 bool IsStrided = !isOneConstant(VL);
2859
2860 // Only do a strided load if we have optimized zero-stride vector load.
2861 if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
2862 break;
2863
2864 SmallVector<SDValue> Operands = {
2865 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0),
2866 Ld->getBasePtr()};
2867 if (IsStrided)
2868 Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT));
2870 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
2871 Operands.append({VL, SEW, PolicyOp, Ld->getChain()});
2872
2874 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
2875 /*IsMasked*/ false, IsStrided, /*FF*/ false,
2876 Log2SEW, static_cast<unsigned>(LMUL));
2877 MachineSDNode *Load =
2878 CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands);
2879 // Update the chain.
2880 ReplaceUses(Src.getValue(1), SDValue(Load, 1));
2881 // Record the mem-refs
2882 CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});
2883 // Replace the splat with the vlse.
2884 ReplaceNode(Node, Load);
2885 return;
2886 }
2887 case ISD::PREFETCH:
2888 unsigned Locality = Node->getConstantOperandVal(3);
2889 if (Locality > 2)
2890 break;
2891
2892 auto *LoadStoreMem = cast<MemSDNode>(Node);
2893 MachineMemOperand *MMO = LoadStoreMem->getMemOperand();
2895
2896 int NontemporalLevel = 0;
2897 switch (Locality) {
2898 case 0:
2899 NontemporalLevel = 3; // NTL.ALL
2900 break;
2901 case 1:
2902 NontemporalLevel = 1; // NTL.PALL
2903 break;
2904 case 2:
2905 NontemporalLevel = 0; // NTL.P1
2906 break;
2907 default:
2908 llvm_unreachable("unexpected locality value.");
2909 }
2910
2911 if (NontemporalLevel & 0b1)
2913 if (NontemporalLevel & 0b10)
2915 break;
2916 }
2917
2918 // Select the default instruction.
2919 SelectCode(Node);
2920}
2921
2923 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
2924 std::vector<SDValue> &OutOps) {
2925 // Always produce a register and immediate operand, as expected by
2926 // RISCVAsmPrinter::PrintAsmMemoryOperand.
2927 switch (ConstraintID) {
2930 SDValue Op0, Op1;
2931 [[maybe_unused]] bool Found = SelectAddrRegImm(Op, Op0, Op1);
2932 assert(Found && "SelectAddrRegImm should always succeed");
2933 OutOps.push_back(Op0);
2934 OutOps.push_back(Op1);
2935 return false;
2936 }
2938 OutOps.push_back(Op);
2939 OutOps.push_back(
2940 CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT()));
2941 return false;
2942 default:
2943 report_fatal_error("Unexpected asm memory constraint " +
2944 InlineAsm::getMemConstraintName(ConstraintID));
2945 }
2946
2947 return true;
2948}
2949
2951 SDValue &Offset) {
2952 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
2953 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
2954 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT());
2955 return true;
2956 }
2957
2958 return false;
2959}
2960
2961// Fold constant addresses.
2962static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
2963 const MVT VT, const RISCVSubtarget *Subtarget,
2965 bool IsPrefetch = false) {
2966 if (!isa<ConstantSDNode>(Addr))
2967 return false;
2968
2969 int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue();
2970
2971 // If the constant is a simm12, we can fold the whole constant and use X0 as
2972 // the base. If the constant can be materialized with LUI+simm12, use LUI as
2973 // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
2974 int64_t Lo12 = SignExtend64<12>(CVal);
2975 int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
2976 if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
2977 if (IsPrefetch && (Lo12 & 0b11111) != 0)
2978 return false;
2979 if (Hi) {
2980 int64_t Hi20 = (Hi >> 12) & 0xfffff;
2981 Base = SDValue(
2982 CurDAG->getMachineNode(RISCV::LUI, DL, VT,
2983 CurDAG->getTargetConstant(Hi20, DL, VT)),
2984 0);
2985 } else {
2986 Base = CurDAG->getRegister(RISCV::X0, VT);
2987 }
2988 Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT);
2989 return true;
2990 }
2991
2992 // Ask how constant materialization would handle this constant.
2993 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget);
2994
2995 // If the last instruction would be an ADDI, we can fold its immediate and
2996 // emit the rest of the sequence as the base.
2997 if (Seq.back().getOpcode() != RISCV::ADDI)
2998 return false;
2999 Lo12 = Seq.back().getImm();
3000 if (IsPrefetch && (Lo12 & 0b11111) != 0)
3001 return false;
3002
3003 // Drop the last instruction.
3004 Seq.pop_back();
3005 assert(!Seq.empty() && "Expected more instructions in sequence");
3006
3007 Base = selectImmSeq(CurDAG, DL, VT, Seq);
3008 Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT);
3009 return true;
3010}
3011
3012// Is this ADD instruction only used as the base pointer of scalar loads and
3013// stores?
3015 for (auto *User : Add->users()) {
3016 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
3017 User->getOpcode() != RISCVISD::LD_RV32 &&
3018 User->getOpcode() != RISCVISD::SD_RV32 &&
3019 User->getOpcode() != ISD::ATOMIC_LOAD &&
3020 User->getOpcode() != ISD::ATOMIC_STORE)
3021 return false;
3022 EVT VT = cast<MemSDNode>(User)->getMemoryVT();
3023 if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
3024 VT != MVT::f64)
3025 return false;
3026 // Don't allow stores of the value. It must be used as the address.
3027 if (User->getOpcode() == ISD::STORE &&
3028 cast<StoreSDNode>(User)->getValue() == Add)
3029 return false;
3030 if (User->getOpcode() == ISD::ATOMIC_STORE &&
3031 cast<AtomicSDNode>(User)->getVal() == Add)
3032 return false;
3033 if (User->getOpcode() == RISCVISD::SD_RV32 &&
3034 (User->getOperand(0) == Add || User->getOperand(1) == Add))
3035 return false;
3036 if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering()))
3037 return false;
3038 }
3039
3040 return true;
3041}
3042
3044 switch (User->getOpcode()) {
3045 default:
3046 return false;
3047 case ISD::LOAD:
3048 case RISCVISD::LD_RV32:
3049 case ISD::ATOMIC_LOAD:
3050 break;
3051 case ISD::STORE:
3052 // Don't allow stores of Add. It must only be used as the address.
3053 if (cast<StoreSDNode>(User)->getValue() == Add)
3054 return false;
3055 break;
3056 case RISCVISD::SD_RV32:
3057 // Don't allow stores of Add. It must only be used as the address.
3058 if (User->getOperand(0) == Add || User->getOperand(1) == Add)
3059 return false;
3060 break;
3061 case ISD::ATOMIC_STORE:
3062 // Don't allow stores of Add. It must only be used as the address.
3063 if (cast<AtomicSDNode>(User)->getVal() == Add)
3064 return false;
3065 break;
3066 }
3067
3068 return true;
3069}
3070
3071// To prevent SelectAddrRegImm from folding offsets that conflict with the
3072// fusion of PseudoMovAddr, check if the offset of every use of a given address
3073// is within the alignment.
3075 Align Alignment) {
3076 assert(Addr->getOpcode() == RISCVISD::ADD_LO);
3077 for (auto *User : Addr->users()) {
3078 // If the user is a load or store, then the offset is 0 which is always
3079 // within alignment.
3080 if (isRegImmLoadOrStore(User, Addr))
3081 continue;
3082
3083 if (CurDAG->isBaseWithConstantOffset(SDValue(User, 0))) {
3084 int64_t CVal = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
3085 if (!isInt<12>(CVal) || Alignment <= CVal)
3086 return false;
3087
3088 // Make sure all uses are foldable load/stores.
3089 for (auto *AddUser : User->users())
3090 if (!isRegImmLoadOrStore(AddUser, SDValue(User, 0)))
3091 return false;
3092
3093 continue;
3094 }
3095
3096 return false;
3097 }
3098
3099 return true;
3100}
3101
3103 SDValue &Offset) {
3104 if (SelectAddrFrameIndex(Addr, Base, Offset))
3105 return true;
3106
3107 SDLoc DL(Addr);
3108 MVT VT = Addr.getSimpleValueType();
3109
3110 if (Addr.getOpcode() == RISCVISD::ADD_LO) {
3111 bool CanFold = true;
3112 // Unconditionally fold if operand 1 is not a global address (e.g.
3113 // externsymbol)
3114 if (auto *GA = dyn_cast<GlobalAddressSDNode>(Addr.getOperand(1))) {
3115 const DataLayout &DL = CurDAG->getDataLayout();
3116 Align Alignment = commonAlignment(
3117 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
3118 if (!areOffsetsWithinAlignment(Addr, Alignment))
3119 CanFold = false;
3120 }
3121 if (CanFold) {
3122 Base = Addr.getOperand(0);
3123 Offset = Addr.getOperand(1);
3124 return true;
3125 }
3126 }
3127
3128 if (CurDAG->isBaseWithConstantOffset(Addr)) {
3129 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3130 if (isInt<12>(CVal)) {
3131 Base = Addr.getOperand(0);
3132 if (Base.getOpcode() == RISCVISD::ADD_LO) {
3133 SDValue LoOperand = Base.getOperand(1);
3134 if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) {
3135 // If the Lo in (ADD_LO hi, lo) is a global variable's address
3136 // (its low part, really), then we can rely on the alignment of that
3137 // variable to provide a margin of safety before low part can overflow
3138 // the 12 bits of the load/store offset. Check if CVal falls within
3139 // that margin; if so (low part + CVal) can't overflow.
3140 const DataLayout &DL = CurDAG->getDataLayout();
3141 Align Alignment = commonAlignment(
3142 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
3143 if ((CVal == 0 || Alignment > CVal) &&
3144 areOffsetsWithinAlignment(Base, Alignment)) {
3145 int64_t CombinedOffset = CVal + GA->getOffset();
3146 Base = Base.getOperand(0);
3147 Offset = CurDAG->getTargetGlobalAddress(
3148 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(),
3149 CombinedOffset, GA->getTargetFlags());
3150 return true;
3151 }
3152 }
3153 }
3154
3155 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
3156 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
3157 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
3158 return true;
3159 }
3160 }
3161
3162 // Handle ADD with large immediates.
3163 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
3164 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3165 assert(!isInt<12>(CVal) && "simm12 not already handled?");
3166
3167 // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
3168 // an ADDI for part of the offset and fold the rest into the load/store.
3169 // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
3170 if (CVal >= -4096 && CVal <= 4094) {
3171 int64_t Adj = CVal < 0 ? -2048 : 2047;
3172 Base = SDValue(
3173 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
3174 CurDAG->getSignedTargetConstant(Adj, DL, VT)),
3175 0);
3176 Offset = CurDAG->getSignedTargetConstant(CVal - Adj, DL, VT);
3177 return true;
3178 }
3179
3180 // For larger immediates, we might be able to save one instruction from
3181 // constant materialization by folding the Lo12 bits of the immediate into
3182 // the address. We should only do this if the ADD is only used by loads and
3183 // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
3184 // separately with the full materialized immediate creating extra
3185 // instructions.
3186 if (isWorthFoldingAdd(Addr) &&
3187 selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
3188 Offset, /*IsPrefetch=*/false)) {
3189 // Insert an ADD instruction with the materialized Hi52 bits.
3190 Base = SDValue(
3191 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
3192 0);
3193 return true;
3194 }
3195 }
3196
3197 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
3198 /*IsPrefetch=*/false))
3199 return true;
3200
3201 Base = Addr;
3202 Offset = CurDAG->getTargetConstant(0, DL, VT);
3203 return true;
3204}
3205
3206/// Similar to SelectAddrRegImm, except that the offset is restricted to uimm9.
3208 SDValue &Offset) {
3209 if (SelectAddrFrameIndex(Addr, Base, Offset))
3210 return true;
3211
3212 SDLoc DL(Addr);
3213 MVT VT = Addr.getSimpleValueType();
3214
3215 if (CurDAG->isBaseWithConstantOffset(Addr)) {
3216 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3217 if (isUInt<9>(CVal)) {
3218 Base = Addr.getOperand(0);
3219
3220 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
3221 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
3222 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
3223 return true;
3224 }
3225 }
3226
3227 Base = Addr;
3228 Offset = CurDAG->getTargetConstant(0, DL, VT);
3229 return true;
3230}
3231
3232/// Similar to SelectAddrRegImm, except that the least significant 5 bits of
3233/// Offset should be all zeros.
3235 SDValue &Offset) {
3236 if (SelectAddrFrameIndex(Addr, Base, Offset))
3237 return true;
3238
3239 SDLoc DL(Addr);
3240 MVT VT = Addr.getSimpleValueType();
3241
3242 if (CurDAG->isBaseWithConstantOffset(Addr)) {
3243 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3244 if (isInt<12>(CVal)) {
3245 Base = Addr.getOperand(0);
3246
3247 // Early-out if not a valid offset.
3248 if ((CVal & 0b11111) != 0) {
3249 Base = Addr;
3250 Offset = CurDAG->getTargetConstant(0, DL, VT);
3251 return true;
3252 }
3253
3254 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
3255 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
3256 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
3257 return true;
3258 }
3259 }
3260
3261 // Handle ADD with large immediates.
3262 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
3263 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3264 assert(!isInt<12>(CVal) && "simm12 not already handled?");
3265
3266 // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
3267 // one instruction by folding adjustment (-2048 or 2016) into the address.
3268 if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {
3269 int64_t Adj = CVal < 0 ? -2048 : 2016;
3270 int64_t AdjustedOffset = CVal - Adj;
3271 Base =
3272 SDValue(CurDAG->getMachineNode(
3273 RISCV::ADDI, DL, VT, Addr.getOperand(0),
3274 CurDAG->getSignedTargetConstant(AdjustedOffset, DL, VT)),
3275 0);
3276 Offset = CurDAG->getSignedTargetConstant(Adj, DL, VT);
3277 return true;
3278 }
3279
3280 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
3281 Offset, /*IsPrefetch=*/true)) {
3282 // Insert an ADD instruction with the materialized Hi52 bits.
3283 Base = SDValue(
3284 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
3285 0);
3286 return true;
3287 }
3288 }
3289
3290 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
3291 /*IsPrefetch=*/true))
3292 return true;
3293
3294 Base = Addr;
3295 Offset = CurDAG->getTargetConstant(0, DL, VT);
3296 return true;
3297}
3298
3299/// Return true if this a load/store that we have a RegRegScale instruction for.
3301 const RISCVSubtarget &Subtarget) {
3302 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE)
3303 return false;
3304 EVT VT = cast<MemSDNode>(User)->getMemoryVT();
3305 if (!(VT.isScalarInteger() &&
3306 (Subtarget.hasVendorXTHeadMemIdx() || Subtarget.hasVendorXqcisls())) &&
3307 !((VT == MVT::f32 || VT == MVT::f64) &&
3308 Subtarget.hasVendorXTHeadFMemIdx()))
3309 return false;
3310 // Don't allow stores of the value. It must be used as the address.
3311 if (User->getOpcode() == ISD::STORE &&
3312 cast<StoreSDNode>(User)->getValue() == Add)
3313 return false;
3314
3315 return true;
3316}
3317
3318/// Is it profitable to fold this Add into RegRegScale load/store. If \p
3319/// Shift is non-null, then we have matched a shl+add. We allow reassociating
3320/// (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2)) if there is a
3321/// single addi and we don't have a SHXADD instruction we could use.
3322/// FIXME: May still need to check how many and what kind of users the SHL has.
3324 SDValue Add,
3325 SDValue Shift = SDValue()) {
3326 bool FoundADDI = false;
3327 for (auto *User : Add->users()) {
3328 if (isRegRegScaleLoadOrStore(User, Add, Subtarget))
3329 continue;
3330
3331 // Allow a single ADDI that is used by loads/stores if we matched a shift.
3332 if (!Shift || FoundADDI || User->getOpcode() != ISD::ADD ||
3334 !isInt<12>(cast<ConstantSDNode>(User->getOperand(1))->getSExtValue()))
3335 return false;
3336
3337 FoundADDI = true;
3338
3339 // If we have a SHXADD instruction, prefer that over reassociating an ADDI.
3340 assert(Shift.getOpcode() == ISD::SHL);
3341 unsigned ShiftAmt = Shift.getConstantOperandVal(1);
3342 if (Subtarget.hasShlAdd(ShiftAmt))
3343 return false;
3344
3345 // All users of the ADDI should be load/store.
3346 for (auto *ADDIUser : User->users())
3347 if (!isRegRegScaleLoadOrStore(ADDIUser, SDValue(User, 0), Subtarget))
3348 return false;
3349 }
3350
3351 return true;
3352}
3353
3355 unsigned MaxShiftAmount,
3356 SDValue &Base, SDValue &Index,
3357 SDValue &Scale) {
3358 if (Addr.getOpcode() != ISD::ADD)
3359 return false;
3360 SDValue LHS = Addr.getOperand(0);
3361 SDValue RHS = Addr.getOperand(1);
3362
3363 EVT VT = Addr.getSimpleValueType();
3364 auto SelectShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
3365 SDValue &Shift) {
3366 if (N.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(N.getOperand(1)))
3367 return false;
3368
3369 // Only match shifts by a value in range [0, MaxShiftAmount].
3370 unsigned ShiftAmt = N.getConstantOperandVal(1);
3371 if (ShiftAmt > MaxShiftAmount)
3372 return false;
3373
3374 Index = N.getOperand(0);
3375 Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT);
3376 return true;
3377 };
3378
3379 if (auto *C1 = dyn_cast<ConstantSDNode>(RHS)) {
3380 // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
3381 if (LHS.getOpcode() == ISD::ADD &&
3382 !isa<ConstantSDNode>(LHS.getOperand(1)) &&
3383 isInt<12>(C1->getSExtValue())) {
3384 if (SelectShl(LHS.getOperand(1), Index, Scale) &&
3385 isWorthFoldingIntoRegRegScale(*Subtarget, LHS, LHS.getOperand(1))) {
3386 SDValue C1Val = CurDAG->getTargetConstant(*C1->getConstantIntValue(),
3387 SDLoc(Addr), VT);
3388 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
3389 LHS.getOperand(0), C1Val),
3390 0);
3391 return true;
3392 }
3393
3394 // Add is commutative so we need to check both operands.
3395 if (SelectShl(LHS.getOperand(0), Index, Scale) &&
3396 isWorthFoldingIntoRegRegScale(*Subtarget, LHS, LHS.getOperand(0))) {
3397 SDValue C1Val = CurDAG->getTargetConstant(*C1->getConstantIntValue(),
3398 SDLoc(Addr), VT);
3399 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
3400 LHS.getOperand(1), C1Val),
3401 0);
3402 return true;
3403 }
3404 }
3405
3406 // Don't match add with constants.
3407 // FIXME: Is this profitable for large constants that have 0s in the lower
3408 // 12 bits that we can materialize with LUI?
3409 return false;
3410 }
3411
3412 // Try to match a shift on the RHS.
3413 if (SelectShl(RHS, Index, Scale)) {
3414 if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr, RHS))
3415 return false;
3416 Base = LHS;
3417 return true;
3418 }
3419
3420 // Try to match a shift on the LHS.
3421 if (SelectShl(LHS, Index, Scale)) {
3422 if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr, LHS))
3423 return false;
3424 Base = RHS;
3425 return true;
3426 }
3427
3428 if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr))
3429 return false;
3430
3431 Base = LHS;
3432 Index = RHS;
3433 Scale = CurDAG->getTargetConstant(0, SDLoc(Addr), VT);
3434 return true;
3435}
3436
3438 unsigned MaxShiftAmount,
3439 unsigned Bits, SDValue &Base,
3440 SDValue &Index,
3441 SDValue &Scale) {
3442 if (!SelectAddrRegRegScale(Addr, MaxShiftAmount, Base, Index, Scale))
3443 return false;
3444
3445 if (Index.getOpcode() == ISD::AND) {
3446 auto *C = dyn_cast<ConstantSDNode>(Index.getOperand(1));
3447 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
3448 Index = Index.getOperand(0);
3449 return true;
3450 }
3451 }
3452
3453 return false;
3454}
3455
3457 SDValue &Offset) {
3458 if (Addr.getOpcode() != ISD::ADD)
3459 return false;
3460
3461 if (isa<ConstantSDNode>(Addr.getOperand(1)))
3462 return false;
3463
3464 Base = Addr.getOperand(0);
3465 Offset = Addr.getOperand(1);
3466 return true;
3467}
3468
3470 SDValue &ShAmt) {
3471 ShAmt = N;
3472
3473 // Peek through zext.
3474 if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)
3475 ShAmt = ShAmt.getOperand(0);
3476
3477 // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
3478 // amount. If there is an AND on the shift amount, we can bypass it if it
3479 // doesn't affect any of those bits.
3480 if (ShAmt.getOpcode() == ISD::AND &&
3481 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
3482 const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
3483
3484 // Since the max shift amount is a power of 2 we can subtract 1 to make a
3485 // mask that covers the bits needed to represent all shift amounts.
3486 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
3487 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
3488
3489 if (ShMask.isSubsetOf(AndMask)) {
3490 ShAmt = ShAmt.getOperand(0);
3491 } else {
3492 // SimplifyDemandedBits may have optimized the mask so try restoring any
3493 // bits that are known zero.
3494 KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0));
3495 if (!ShMask.isSubsetOf(AndMask | Known.Zero))
3496 return true;
3497 ShAmt = ShAmt.getOperand(0);
3498 }
3499 }
3500
3501 if (ShAmt.getOpcode() == ISD::ADD &&
3502 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
3503 uint64_t Imm = ShAmt.getConstantOperandVal(1);
3504 // If we are shifting by X+N where N == 0 mod Size, then just shift by X
3505 // to avoid the ADD.
3506 if (Imm != 0 && Imm % ShiftWidth == 0) {
3507 ShAmt = ShAmt.getOperand(0);
3508 return true;
3509 }
3510 } else if (ShAmt.getOpcode() == ISD::SUB &&
3511 isa<ConstantSDNode>(ShAmt.getOperand(0))) {
3512 uint64_t Imm = ShAmt.getConstantOperandVal(0);
3513 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
3514 // generate a NEG instead of a SUB of a constant.
3515 if (Imm != 0 && Imm % ShiftWidth == 0) {
3516 SDLoc DL(ShAmt);
3517 EVT VT = ShAmt.getValueType();
3518 SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);
3519 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
3520 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
3521 ShAmt.getOperand(1));
3522 ShAmt = SDValue(Neg, 0);
3523 return true;
3524 }
3525 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3526 // to generate a NOT instead of a SUB of a constant.
3527 if (Imm % ShiftWidth == ShiftWidth - 1) {
3528 SDLoc DL(ShAmt);
3529 EVT VT = ShAmt.getValueType();
3530 MachineSDNode *Not = CurDAG->getMachineNode(
3531 RISCV::XORI, DL, VT, ShAmt.getOperand(1),
3532 CurDAG->getAllOnesConstant(DL, VT, /*isTarget=*/true));
3533 ShAmt = SDValue(Not, 0);
3534 return true;
3535 }
3536 }
3537
3538 return true;
3539}
3540
3541/// RISC-V doesn't have general instructions for integer setne/seteq, but we can
3542/// check for equality with 0. This function emits instructions that convert the
3543/// seteq/setne into something that can be compared with 0.
3544/// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.
3545/// ISD::SETNE).
3547 SDValue &Val) {
3548 assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
3549 "Unexpected condition code!");
3550
3551 // We're looking for a setcc.
3552 if (N->getOpcode() != ISD::SETCC)
3553 return false;
3554
3555 // Must be an equality comparison.
3556 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
3557 if (CCVal != ExpectedCCVal)
3558 return false;
3559
3560 SDValue LHS = N->getOperand(0);
3561 SDValue RHS = N->getOperand(1);
3562
3563 if (!LHS.getValueType().isScalarInteger())
3564 return false;
3565
3566 // If the RHS side is 0, we don't need any extra instructions, return the LHS.
3567 if (isNullConstant(RHS)) {
3568 Val = LHS;
3569 return true;
3570 }
3571
3572 SDLoc DL(N);
3573
3574 if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
3575 int64_t CVal = C->getSExtValue();
3576 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
3577 // non-zero otherwise.
3578 if (CVal == -2048) {
3579 Val = SDValue(
3580 CurDAG->getMachineNode(
3581 RISCV::XORI, DL, N->getValueType(0), LHS,
3582 CurDAG->getSignedTargetConstant(CVal, DL, N->getValueType(0))),
3583 0);
3584 return true;
3585 }
3586 // If the RHS is [-2047,2048], we can use addi/addiw with -RHS to produce 0
3587 // if the LHS is equal to the RHS and non-zero otherwise.
3588 if (isInt<12>(CVal) || CVal == 2048) {
3589 unsigned Opc = RISCV::ADDI;
3590 if (LHS.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3591 cast<VTSDNode>(LHS.getOperand(1))->getVT() == MVT::i32) {
3592 Opc = RISCV::ADDIW;
3593 LHS = LHS.getOperand(0);
3594 }
3595
3596 Val = SDValue(CurDAG->getMachineNode(Opc, DL, N->getValueType(0), LHS,
3597 CurDAG->getSignedTargetConstant(
3598 -CVal, DL, N->getValueType(0))),
3599 0);
3600 return true;
3601 }
3602 if (isPowerOf2_64(CVal) && Subtarget->hasStdExtZbs()) {
3603 Val = SDValue(
3604 CurDAG->getMachineNode(
3605 RISCV::BINVI, DL, N->getValueType(0), LHS,
3606 CurDAG->getTargetConstant(Log2_64(CVal), DL, N->getValueType(0))),
3607 0);
3608 return true;
3609 }
3610 // Same as the addi case above but for larger immediates (signed 26-bit) use
3611 // the QC_E_ADDI instruction from the Xqcilia extension, if available. Avoid
3612 // anything which can be done with a single lui as it might be compressible.
3613 if (Subtarget->hasVendorXqcilia() && isInt<26>(CVal) &&
3614 (CVal & 0xFFF) != 0) {
3615 Val = SDValue(
3616 CurDAG->getMachineNode(
3617 RISCV::QC_E_ADDI, DL, N->getValueType(0), LHS,
3618 CurDAG->getSignedTargetConstant(-CVal, DL, N->getValueType(0))),
3619 0);
3620 return true;
3621 }
3622 }
3623
3624 // If nothing else we can XOR the LHS and RHS to produce zero if they are
3625 // equal and a non-zero value if they aren't.
3626 Val = SDValue(
3627 CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0);
3628 return true;
3629}
3630
3632 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3633 cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) {
3634 Val = N.getOperand(0);
3635 return true;
3636 }
3637
3638 auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
3639 if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1)))
3640 return N;
3641
3642 SDValue N0 = N.getOperand(0);
3643 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
3644 N.getConstantOperandVal(1) == ShiftAmt &&
3645 N0.getConstantOperandVal(1) == ShiftAmt)
3646 return N0.getOperand(0);
3647
3648 return N;
3649 };
3650
3651 MVT VT = N.getSimpleValueType();
3652 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) {
3653 Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
3654 return true;
3655 }
3656
3657 return false;
3658}
3659
3661 if (N.getOpcode() == ISD::AND) {
3662 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
3663 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
3664 Val = N.getOperand(0);
3665 return true;
3666 }
3667 }
3668 MVT VT = N.getSimpleValueType();
3669 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits);
3670 if (CurDAG->MaskedValueIsZero(N, Mask)) {
3671 Val = N;
3672 return true;
3673 }
3674
3675 return false;
3676}
3677
3678/// Look for various patterns that can be done with a SHL that can be folded
3679/// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
3680/// SHXADD we are trying to match.
3682 SDValue &Val) {
3683 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
3684 SDValue N0 = N.getOperand(0);
3685
3686 if (bool LeftShift = N0.getOpcode() == ISD::SHL;
3687 (LeftShift || N0.getOpcode() == ISD::SRL) &&
3689 uint64_t Mask = N.getConstantOperandVal(1);
3690 unsigned C2 = N0.getConstantOperandVal(1);
3691
3692 unsigned XLen = Subtarget->getXLen();
3693 if (LeftShift)
3694 Mask &= maskTrailingZeros<uint64_t>(C2);
3695 else
3696 Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
3697
3698 if (isShiftedMask_64(Mask)) {
3699 unsigned Leading = XLen - llvm::bit_width(Mask);
3700 unsigned Trailing = llvm::countr_zero(Mask);
3701 if (Trailing != ShAmt)
3702 return false;
3703
3704 unsigned Opcode;
3705 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
3706 // leading zeros and c3 trailing zeros. We can use an SRLI by c3-c2
3707 // followed by a SHXADD with c3 for the X amount.
3708 if (LeftShift && Leading == 0 && C2 < Trailing)
3709 Opcode = RISCV::SRLI;
3710 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with 32-c2
3711 // leading zeros and c3 trailing zeros. We can use an SRLIW by c3-c2
3712 // followed by a SHXADD with c3 for the X amount.
3713 else if (LeftShift && Leading == 32 - C2 && C2 < Trailing)
3714 Opcode = RISCV::SRLIW;
3715 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
3716 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
3717 // followed by a SHXADD using c3 for the X amount.
3718 else if (!LeftShift && Leading == C2)
3719 Opcode = RISCV::SRLI;
3720 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with 32+c2
3721 // leading zeros and c3 trailing zeros. We can use an SRLIW by c2+c3
3722 // followed by a SHXADD using c3 for the X amount.
3723 else if (!LeftShift && Leading == 32 + C2)
3724 Opcode = RISCV::SRLIW;
3725 else
3726 return false;
3727
3728 SDLoc DL(N);
3729 EVT VT = N.getValueType();
3730 ShAmt = LeftShift ? Trailing - C2 : Trailing + C2;
3731 Val = SDValue(
3732 CurDAG->getMachineNode(Opcode, DL, VT, N0.getOperand(0),
3733 CurDAG->getTargetConstant(ShAmt, DL, VT)),
3734 0);
3735 return true;
3736 }
3737 } else if (N0.getOpcode() == ISD::SRA && N0.hasOneUse() &&
3739 uint64_t Mask = N.getConstantOperandVal(1);
3740 unsigned C2 = N0.getConstantOperandVal(1);
3741
3742 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
3743 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
3744 // use (srli (srai y, c2 - c3), c3 + c4) followed by a SHXADD with c4 as
3745 // the X amount.
3746 if (isShiftedMask_64(Mask)) {
3747 unsigned XLen = Subtarget->getXLen();
3748 unsigned Leading = XLen - llvm::bit_width(Mask);
3749 unsigned Trailing = llvm::countr_zero(Mask);
3750 if (C2 > Leading && Leading > 0 && Trailing == ShAmt) {
3751 SDLoc DL(N);
3752 EVT VT = N.getValueType();
3753 Val = SDValue(CurDAG->getMachineNode(
3754 RISCV::SRAI, DL, VT, N0.getOperand(0),
3755 CurDAG->getTargetConstant(C2 - Leading, DL, VT)),
3756 0);
3757 Val = SDValue(CurDAG->getMachineNode(
3758 RISCV::SRLI, DL, VT, Val,
3759 CurDAG->getTargetConstant(Leading + ShAmt, DL, VT)),
3760 0);
3761 return true;
3762 }
3763 }
3764 }
3765 } else if (bool LeftShift = N.getOpcode() == ISD::SHL;
3766 (LeftShift || N.getOpcode() == ISD::SRL) &&
3767 isa<ConstantSDNode>(N.getOperand(1))) {
3768 SDValue N0 = N.getOperand(0);
3769 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
3771 uint64_t Mask = N0.getConstantOperandVal(1);
3772 if (isShiftedMask_64(Mask)) {
3773 unsigned C1 = N.getConstantOperandVal(1);
3774 unsigned XLen = Subtarget->getXLen();
3775 unsigned Leading = XLen - llvm::bit_width(Mask);
3776 unsigned Trailing = llvm::countr_zero(Mask);
3777 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
3778 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
3779 if (LeftShift && Leading == 32 && Trailing > 0 &&
3780 (Trailing + C1) == ShAmt) {
3781 SDLoc DL(N);
3782 EVT VT = N.getValueType();
3783 Val = SDValue(CurDAG->getMachineNode(
3784 RISCV::SRLIW, DL, VT, N0.getOperand(0),
3785 CurDAG->getTargetConstant(Trailing, DL, VT)),
3786 0);
3787 return true;
3788 }
3789 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
3790 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
3791 if (!LeftShift && Leading == 32 && Trailing > C1 &&
3792 (Trailing - C1) == ShAmt) {
3793 SDLoc DL(N);
3794 EVT VT = N.getValueType();
3795 Val = SDValue(CurDAG->getMachineNode(
3796 RISCV::SRLIW, DL, VT, N0.getOperand(0),
3797 CurDAG->getTargetConstant(Trailing, DL, VT)),
3798 0);
3799 return true;
3800 }
3801 }
3802 }
3803 }
3804
3805 return false;
3806}
3807
3808/// Look for various patterns that can be done with a SHL that can be folded
3809/// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
3810/// SHXADD_UW we are trying to match.
3812 SDValue &Val) {
3813 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) &&
3814 N.hasOneUse()) {
3815 SDValue N0 = N.getOperand(0);
3816 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
3817 N0.hasOneUse()) {
3818 uint64_t Mask = N.getConstantOperandVal(1);
3819 unsigned C2 = N0.getConstantOperandVal(1);
3820
3821 Mask &= maskTrailingZeros<uint64_t>(C2);
3822
3823 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
3824 // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
3825 // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
3826 if (isShiftedMask_64(Mask)) {
3827 unsigned Leading = llvm::countl_zero(Mask);
3828 unsigned Trailing = llvm::countr_zero(Mask);
3829 if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
3830 SDLoc DL(N);
3831 EVT VT = N.getValueType();
3832 Val = SDValue(CurDAG->getMachineNode(
3833 RISCV::SLLI, DL, VT, N0.getOperand(0),
3834 CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)),
3835 0);
3836 return true;
3837 }
3838 }
3839 }
3840 }
3841
3842 return false;
3843}
3844
3846 assert(N->getOpcode() == ISD::OR || N->getOpcode() == RISCVISD::OR_VL);
3847 if (N->getFlags().hasDisjoint())
3848 return true;
3849 return CurDAG->haveNoCommonBitsSet(N->getOperand(0), N->getOperand(1));
3850}
3851
3852bool RISCVDAGToDAGISel::selectImm64IfCheaper(int64_t Imm, int64_t OrigImm,
3853 SDValue N, SDValue &Val) {
3854 int OrigCost = RISCVMatInt::getIntMatCost(APInt(64, OrigImm), 64, *Subtarget,
3855 /*CompressionCost=*/true);
3856 int Cost = RISCVMatInt::getIntMatCost(APInt(64, Imm), 64, *Subtarget,
3857 /*CompressionCost=*/true);
3858 if (OrigCost <= Cost)
3859 return false;
3860
3861 Val = selectImm(CurDAG, SDLoc(N), N->getSimpleValueType(0), Imm, *Subtarget);
3862 return true;
3863}
3864
3866 if (!isa<ConstantSDNode>(N))
3867 return false;
3868 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
3869 if ((Imm >> 31) != 1)
3870 return false;
3871
3872 for (const SDNode *U : N->users()) {
3873 switch (U->getOpcode()) {
3874 case ISD::ADD:
3875 break;
3876 case ISD::OR:
3877 if (orDisjoint(U))
3878 break;
3879 return false;
3880 default:
3881 return false;
3882 }
3883 }
3884
3885 return selectImm64IfCheaper(0xffffffff00000000 | Imm, Imm, N, Val);
3886}
3887
3889 if (!isa<ConstantSDNode>(N))
3890 return false;
3891 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
3892 if (isInt<32>(Imm))
3893 return false;
3894
3895 for (const SDNode *U : N->users()) {
3896 switch (U->getOpcode()) {
3897 case ISD::ADD:
3898 break;
3899 case RISCVISD::VMV_V_X_VL:
3900 if (!all_of(U->users(), [](const SDNode *V) {
3901 return V->getOpcode() == ISD::ADD ||
3902 V->getOpcode() == RISCVISD::ADD_VL;
3903 }))
3904 return false;
3905 break;
3906 default:
3907 return false;
3908 }
3909 }
3910
3911 return selectImm64IfCheaper(-Imm, Imm, N, Val);
3912}
3913
3915 if (!isa<ConstantSDNode>(N))
3916 return false;
3917 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
3918
3919 // For 32-bit signed constants, we can only substitute LUI+ADDI with LUI.
3920 if (isInt<32>(Imm) && ((Imm & 0xfff) != 0xfff || Imm == -1))
3921 return false;
3922
3923 // Abandon this transform if the constant is needed elsewhere.
3924 for (const SDNode *U : N->users()) {
3925 switch (U->getOpcode()) {
3926 case ISD::AND:
3927 case ISD::OR:
3928 case ISD::XOR:
3929 if (!(Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbkb()))
3930 return false;
3931 break;
3932 case RISCVISD::VMV_V_X_VL:
3933 if (!Subtarget->hasStdExtZvkb())
3934 return false;
3935 if (!all_of(U->users(), [](const SDNode *V) {
3936 return V->getOpcode() == ISD::AND ||
3937 V->getOpcode() == RISCVISD::AND_VL;
3938 }))
3939 return false;
3940 break;
3941 default:
3942 return false;
3943 }
3944 }
3945
3946 if (isInt<32>(Imm)) {
3947 Val =
3948 selectImm(CurDAG, SDLoc(N), N->getSimpleValueType(0), ~Imm, *Subtarget);
3949 return true;
3950 }
3951
3952 // For 64-bit constants, the instruction sequences get complex,
3953 // so we select inverted only if it's cheaper.
3954 return selectImm64IfCheaper(~Imm, Imm, N, Val);
3955}
3956
3957static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
3958 unsigned Bits,
3959 const TargetInstrInfo *TII) {
3960 unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode());
3961
3962 if (!MCOpcode)
3963 return false;
3964
3965 const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
3966 const uint64_t TSFlags = MCID.TSFlags;
3967 if (!RISCVII::hasSEWOp(TSFlags))
3968 return false;
3969 assert(RISCVII::hasVLOp(TSFlags));
3970
3971 unsigned ChainOpIdx = User->getNumOperands() - 1;
3972 bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other;
3973 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
3974 unsigned VLIdx = User->getNumOperands() - HasVecPolicyOp - HasChainOp - 2;
3975 const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1);
3976
3977 if (UserOpNo == VLIdx)
3978 return false;
3979
3980 auto NumDemandedBits =
3981 RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW);
3982 return NumDemandedBits && Bits >= *NumDemandedBits;
3983}
3984
3985// Return true if all users of this SDNode* only consume the lower \p Bits.
3986// This can be used to form W instructions for add/sub/mul/shl even when the
3987// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
3988// SimplifyDemandedBits has made it so some users see a sext_inreg and some
3989// don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
3990// the add/sub/mul/shl to become non-W instructions. By checking the users we
3991// may be able to use a W instruction and CSE with the other instruction if
3992// this has happened. We could try to detect that the CSE opportunity exists
3993// before doing this, but that would be more complicated.
3995 const unsigned Depth) const {
3996 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
3997 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
3998 Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
3999 Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
4000 Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
4001 isa<ConstantSDNode>(Node) || Depth != 0) &&
4002 "Unexpected opcode");
4003
4005 return false;
4006
4007 // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
4008 // the VT. Ensure the type is scalar to avoid wasting time on vectors.
4009 if (Depth == 0 && !Node->getValueType(0).isScalarInteger())
4010 return false;
4011
4012 for (SDUse &Use : Node->uses()) {
4013 SDNode *User = Use.getUser();
4014 // Users of this node should have already been instruction selected
4015 if (!User->isMachineOpcode())
4016 return false;
4017
4018 // TODO: Add more opcodes?
4019 switch (User->getMachineOpcode()) {
4020 default:
4022 break;
4023 return false;
4024 case RISCV::ADDW:
4025 case RISCV::ADDIW:
4026 case RISCV::SUBW:
4027 case RISCV::MULW:
4028 case RISCV::SLLW:
4029 case RISCV::SLLIW:
4030 case RISCV::SRAW:
4031 case RISCV::SRAIW:
4032 case RISCV::SRLW:
4033 case RISCV::SRLIW:
4034 case RISCV::DIVW:
4035 case RISCV::DIVUW:
4036 case RISCV::REMW:
4037 case RISCV::REMUW:
4038 case RISCV::ROLW:
4039 case RISCV::RORW:
4040 case RISCV::RORIW:
4041 case RISCV::CLZW:
4042 case RISCV::CTZW:
4043 case RISCV::CPOPW:
4044 case RISCV::SLLI_UW:
4045 case RISCV::ABSW:
4046 case RISCV::FMV_W_X:
4047 case RISCV::FCVT_H_W:
4048 case RISCV::FCVT_H_W_INX:
4049 case RISCV::FCVT_H_WU:
4050 case RISCV::FCVT_H_WU_INX:
4051 case RISCV::FCVT_S_W:
4052 case RISCV::FCVT_S_W_INX:
4053 case RISCV::FCVT_S_WU:
4054 case RISCV::FCVT_S_WU_INX:
4055 case RISCV::FCVT_D_W:
4056 case RISCV::FCVT_D_W_INX:
4057 case RISCV::FCVT_D_WU:
4058 case RISCV::FCVT_D_WU_INX:
4059 case RISCV::TH_REVW:
4060 case RISCV::TH_SRRIW:
4061 if (Bits >= 32)
4062 break;
4063 return false;
4064 case RISCV::SLL:
4065 case RISCV::SRA:
4066 case RISCV::SRL:
4067 case RISCV::ROL:
4068 case RISCV::ROR:
4069 case RISCV::BSET:
4070 case RISCV::BCLR:
4071 case RISCV::BINV:
4072 // Shift amount operands only use log2(Xlen) bits.
4073 if (Use.getOperandNo() == 1 && Bits >= Log2_32(Subtarget->getXLen()))
4074 break;
4075 return false;
4076 case RISCV::SLLI:
4077 // SLLI only uses the lower (XLen - ShAmt) bits.
4078 if (Bits >= Subtarget->getXLen() - User->getConstantOperandVal(1))
4079 break;
4080 return false;
4081 case RISCV::ANDI:
4082 if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1)))
4083 break;
4084 goto RecCheck;
4085 case RISCV::ORI: {
4086 uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
4087 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
4088 break;
4089 [[fallthrough]];
4090 }
4091 case RISCV::AND:
4092 case RISCV::OR:
4093 case RISCV::XOR:
4094 case RISCV::XORI:
4095 case RISCV::ANDN:
4096 case RISCV::ORN:
4097 case RISCV::XNOR:
4098 case RISCV::SH1ADD:
4099 case RISCV::SH2ADD:
4100 case RISCV::SH3ADD:
4101 RecCheck:
4102 if (hasAllNBitUsers(User, Bits, Depth + 1))
4103 break;
4104 return false;
4105 case RISCV::SRLI: {
4106 unsigned ShAmt = User->getConstantOperandVal(1);
4107 // If we are shifting right by less than Bits, and users don't demand any
4108 // bits that were shifted into [Bits-1:0], then we can consider this as an
4109 // N-Bit user.
4110 if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1))
4111 break;
4112 return false;
4113 }
4114 case RISCV::SEXT_B:
4115 case RISCV::PACKH:
4116 if (Bits >= 8)
4117 break;
4118 return false;
4119 case RISCV::SEXT_H:
4120 case RISCV::FMV_H_X:
4121 case RISCV::ZEXT_H_RV32:
4122 case RISCV::ZEXT_H_RV64:
4123 case RISCV::PACKW:
4124 if (Bits >= 16)
4125 break;
4126 return false;
4127 case RISCV::PACK:
4128 if (Bits >= (Subtarget->getXLen() / 2))
4129 break;
4130 return false;
4131 case RISCV::ADD_UW:
4132 case RISCV::SH1ADD_UW:
4133 case RISCV::SH2ADD_UW:
4134 case RISCV::SH3ADD_UW:
4135 // The first operand to add.uw/shXadd.uw is implicitly zero extended from
4136 // 32 bits.
4137 if (Use.getOperandNo() == 0 && Bits >= 32)
4138 break;
4139 return false;
4140 case RISCV::SB:
4141 if (Use.getOperandNo() == 0 && Bits >= 8)
4142 break;
4143 return false;
4144 case RISCV::SH:
4145 if (Use.getOperandNo() == 0 && Bits >= 16)
4146 break;
4147 return false;
4148 case RISCV::SW:
4149 if (Use.getOperandNo() == 0 && Bits >= 32)
4150 break;
4151 return false;
4152 case RISCV::TH_EXT:
4153 case RISCV::TH_EXTU: {
4154 unsigned Msb = User->getConstantOperandVal(1);
4155 unsigned Lsb = User->getConstantOperandVal(2);
4156 // Behavior of Msb < Lsb is not well documented.
4157 if (Msb >= Lsb && Bits > Msb)
4158 break;
4159 return false;
4160 }
4161 }
4162 }
4163
4164 return true;
4165}
4166
4167// Select a constant that can be represented as (sign_extend(imm5) << imm2).
4169 SDValue &Shl2) {
4170 auto *C = dyn_cast<ConstantSDNode>(N);
4171 if (!C)
4172 return false;
4173
4174 int64_t Offset = C->getSExtValue();
4175 for (unsigned Shift = 0; Shift < 4; Shift++) {
4176 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0)) {
4177 EVT VT = N->getValueType(0);
4178 Simm5 = CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(N), VT);
4179 Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), VT);
4180 return true;
4181 }
4182 }
4183
4184 return false;
4185}
4186
4187// Select VL as a 5 bit immediate or a value that will become a register. This
4188// allows us to choose between VSETIVLI or VSETVLI later.
4190 auto *C = dyn_cast<ConstantSDNode>(N);
4191 if (C && isUInt<5>(C->getZExtValue())) {
4192 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
4193 N->getValueType(0));
4194 } else if (C && C->isAllOnes()) {
4195 // Treat all ones as VLMax.
4196 VL = CurDAG->getSignedTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
4197 N->getValueType(0));
4198 } else if (isa<RegisterSDNode>(N) &&
4199 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {
4200 // All our VL operands use an operand that allows GPRNoX0 or an immediate
4201 // as the register class. Convert X0 to a special immediate to pass the
4202 // MachineVerifier. This is recognized specially by the vsetvli insertion
4203 // pass.
4204 VL = CurDAG->getSignedTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
4205 N->getValueType(0));
4206 } else {
4207 VL = N;
4208 }
4209
4210 return true;
4211}
4212
4214 if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {
4215 if (!N.getOperand(0).isUndef())
4216 return SDValue();
4217 N = N.getOperand(1);
4218 }
4219 SDValue Splat = N;
4220 if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&
4221 Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||
4222 !Splat.getOperand(0).isUndef())
4223 return SDValue();
4224 assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");
4225 return Splat;
4226}
4227
4230 if (!Splat)
4231 return false;
4232
4233 SplatVal = Splat.getOperand(1);
4234 return true;
4235}
4236
4238 SelectionDAG &DAG,
4239 const RISCVSubtarget &Subtarget,
4240 std::function<bool(int64_t)> ValidateImm,
4241 bool Decrement = false) {
4243 if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1)))
4244 return false;
4245
4246 const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();
4247 assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&
4248 "Unexpected splat operand type");
4249
4250 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
4251 // type is wider than the resulting vector element type: an implicit
4252 // truncation first takes place. Therefore, perform a manual
4253 // truncation/sign-extension in order to ignore any truncated bits and catch
4254 // any zero-extended immediate.
4255 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
4256 // sign-extending to (XLenVT -1).
4257 APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize);
4258
4259 int64_t SplatImm = SplatConst.getSExtValue();
4260
4261 if (!ValidateImm(SplatImm))
4262 return false;
4263
4264 if (Decrement)
4265 SplatImm -= 1;
4266
4267 SplatVal =
4268 DAG.getSignedTargetConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT());
4269 return true;
4270}
4271
4273 return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget,
4274 [](int64_t Imm) { return isInt<5>(Imm); });
4275}
4276
4278 return selectVSplatImmHelper(
4279 N, SplatVal, *CurDAG, *Subtarget,
4280 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; },
4281 /*Decrement=*/true);
4282}
4283
4285 return selectVSplatImmHelper(
4286 N, SplatVal, *CurDAG, *Subtarget,
4287 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; },
4288 /*Decrement=*/false);
4289}
4290
4292 SDValue &SplatVal) {
4293 return selectVSplatImmHelper(
4294 N, SplatVal, *CurDAG, *Subtarget,
4295 [](int64_t Imm) {
4296 return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16);
4297 },
4298 /*Decrement=*/true);
4299}
4300
4302 SDValue &SplatVal) {
4303 return selectVSplatImmHelper(
4304 N, SplatVal, *CurDAG, *Subtarget,
4305 [Bits](int64_t Imm) { return isUIntN(Bits, Imm); });
4306}
4307
4310 return Splat && selectNegImm(Splat.getOperand(1), SplatVal);
4311}
4312
4314 auto IsExtOrTrunc = [](SDValue N) {
4315 switch (N->getOpcode()) {
4316 case ISD::SIGN_EXTEND:
4317 case ISD::ZERO_EXTEND:
4318 // There's no passthru on these _VL nodes so any VL/mask is ok, since any
4319 // inactive elements will be undef.
4320 case RISCVISD::TRUNCATE_VECTOR_VL:
4321 case RISCVISD::VSEXT_VL:
4322 case RISCVISD::VZEXT_VL:
4323 return true;
4324 default:
4325 return false;
4326 }
4327 };
4328
4329 // We can have multiple nested nodes, so unravel them all if needed.
4330 while (IsExtOrTrunc(N)) {
4331 if (!N.hasOneUse() || N.getScalarValueSizeInBits() < 8)
4332 return false;
4333 N = N->getOperand(0);
4334 }
4335
4336 return selectVSplat(N, SplatVal);
4337}
4338
4340 // Allow bitcasts from XLenVT -> FP.
4341 if (N.getOpcode() == ISD::BITCAST &&
4342 N.getOperand(0).getValueType() == Subtarget->getXLenVT()) {
4343 Imm = N.getOperand(0);
4344 return true;
4345 }
4346 // Allow moves from XLenVT to FP.
4347 if (N.getOpcode() == RISCVISD::FMV_H_X ||
4348 N.getOpcode() == RISCVISD::FMV_W_X_RV64) {
4349 Imm = N.getOperand(0);
4350 return true;
4351 }
4352
4353 // Otherwise, look for FP constants that can materialized with scalar int.
4355 if (!CFP)
4356 return false;
4357 const APFloat &APF = CFP->getValueAPF();
4358 // td can handle +0.0 already.
4359 if (APF.isPosZero())
4360 return false;
4361
4362 MVT VT = CFP->getSimpleValueType(0);
4363
4364 MVT XLenVT = Subtarget->getXLenVT();
4365 if (VT == MVT::f64 && !Subtarget->is64Bit()) {
4366 assert(APF.isNegZero() && "Unexpected constant.");
4367 return false;
4368 }
4369 SDLoc DL(N);
4370 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
4371 *Subtarget);
4372 return true;
4373}
4374
4376 SDValue &Imm) {
4377 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
4378 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);
4379
4380 if (!isInt<5>(ImmVal))
4381 return false;
4382
4383 Imm = CurDAG->getSignedTargetConstant(ImmVal, SDLoc(N),
4384 Subtarget->getXLenVT());
4385 return true;
4386 }
4387
4388 return false;
4389}
4390
4391// Try to remove sext.w if the input is a W instruction or can be made into
4392// a W instruction cheaply.
4393bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
4394 // Look for the sext.w pattern, addiw rd, rs1, 0.
4395 if (N->getMachineOpcode() != RISCV::ADDIW ||
4396 !isNullConstant(N->getOperand(1)))
4397 return false;
4398
4399 SDValue N0 = N->getOperand(0);
4400 if (!N0.isMachineOpcode())
4401 return false;
4402
4403 switch (N0.getMachineOpcode()) {
4404 default:
4405 break;
4406 case RISCV::ADD:
4407 case RISCV::ADDI:
4408 case RISCV::SUB:
4409 case RISCV::MUL:
4410 case RISCV::SLLI: {
4411 // Convert sext.w+add/sub/mul to their W instructions. This will create
4412 // a new independent instruction. This improves latency.
4413 unsigned Opc;
4414 switch (N0.getMachineOpcode()) {
4415 default:
4416 llvm_unreachable("Unexpected opcode!");
4417 case RISCV::ADD: Opc = RISCV::ADDW; break;
4418 case RISCV::ADDI: Opc = RISCV::ADDIW; break;
4419 case RISCV::SUB: Opc = RISCV::SUBW; break;
4420 case RISCV::MUL: Opc = RISCV::MULW; break;
4421 case RISCV::SLLI: Opc = RISCV::SLLIW; break;
4422 }
4423
4424 SDValue N00 = N0.getOperand(0);
4425 SDValue N01 = N0.getOperand(1);
4426
4427 // Shift amount needs to be uimm5.
4428 if (N0.getMachineOpcode() == RISCV::SLLI &&
4429 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
4430 break;
4431
4432 SDNode *Result =
4433 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
4434 N00, N01);
4435 ReplaceUses(N, Result);
4436 return true;
4437 }
4438 case RISCV::ADDW:
4439 case RISCV::ADDIW:
4440 case RISCV::SUBW:
4441 case RISCV::MULW:
4442 case RISCV::SLLIW:
4443 case RISCV::PACKW:
4444 case RISCV::TH_MULAW:
4445 case RISCV::TH_MULAH:
4446 case RISCV::TH_MULSW:
4447 case RISCV::TH_MULSH:
4448 if (N0.getValueType() == MVT::i32)
4449 break;
4450
4451 // Result is already sign extended just remove the sext.w.
4452 // NOTE: We only handle the nodes that are selected with hasAllWUsers.
4453 ReplaceUses(N, N0.getNode());
4454 return true;
4455 }
4456
4457 return false;
4458}
4459
4460static bool usesAllOnesMask(SDValue MaskOp) {
4461 const auto IsVMSet = [](unsigned Opc) {
4462 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
4463 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
4464 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
4465 Opc == RISCV::PseudoVMSET_M_B8;
4466 };
4467
4468 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
4469 // undefined behaviour if it's the wrong bitwidth, so we could choose to
4470 // assume that it's all-ones? Same applies to its VL.
4471 return MaskOp->isMachineOpcode() && IsVMSet(MaskOp.getMachineOpcode());
4472}
4473
4474static bool isImplicitDef(SDValue V) {
4475 if (!V.isMachineOpcode())
4476 return false;
4477 if (V.getMachineOpcode() == TargetOpcode::REG_SEQUENCE) {
4478 for (unsigned I = 1; I < V.getNumOperands(); I += 2)
4479 if (!isImplicitDef(V.getOperand(I)))
4480 return false;
4481 return true;
4482 }
4483 return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
4484}
4485
4486// Optimize masked RVV pseudo instructions with a known all-ones mask to their
4487// corresponding "unmasked" pseudo versions.
4488bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
4489 const RISCV::RISCVMaskedPseudoInfo *I =
4490 RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
4491 if (!I)
4492 return false;
4493
4494 unsigned MaskOpIdx = I->MaskOpIdx;
4495 if (!usesAllOnesMask(N->getOperand(MaskOpIdx)))
4496 return false;
4497
4498 // There are two classes of pseudos in the table - compares and
4499 // everything else. See the comment on RISCVMaskedPseudo for details.
4500 const unsigned Opc = I->UnmaskedPseudo;
4501 const MCInstrDesc &MCID = TII->get(Opc);
4502 const bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MCID);
4503
4504 const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode());
4505 const bool MaskedHasPassthru = RISCVII::isFirstDefTiedToFirstUse(MaskedMCID);
4506
4507 assert((RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) ||
4509 "Unmasked pseudo has policy but masked pseudo doesn't?");
4510 assert(RISCVII::hasVecPolicyOp(MCID.TSFlags) == HasPassthru &&
4511 "Unexpected pseudo structure");
4512 assert(!(HasPassthru && !MaskedHasPassthru) &&
4513 "Unmasked pseudo has passthru but masked pseudo doesn't?");
4514
4516 // Skip the passthru operand at index 0 if the unmasked don't have one.
4517 bool ShouldSkip = !HasPassthru && MaskedHasPassthru;
4518 bool DropPolicy = !RISCVII::hasVecPolicyOp(MCID.TSFlags) &&
4519 RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags);
4520 bool HasChainOp =
4521 N->getOperand(N->getNumOperands() - 1).getValueType() == MVT::Other;
4522 unsigned LastOpNum = N->getNumOperands() - 1 - HasChainOp;
4523 for (unsigned I = ShouldSkip, E = N->getNumOperands(); I != E; I++) {
4524 // Skip the mask
4525 SDValue Op = N->getOperand(I);
4526 if (I == MaskOpIdx)
4527 continue;
4528 if (DropPolicy && I == LastOpNum)
4529 continue;
4530 Ops.push_back(Op);
4531 }
4532
4533 MachineSDNode *Result =
4534 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4535
4536 if (!N->memoperands_empty())
4537 CurDAG->setNodeMemRefs(Result, N->memoperands());
4538
4539 Result->setFlags(N->getFlags());
4540 ReplaceUses(N, Result);
4541
4542 return true;
4543}
4544
4545/// If our passthru is an implicit_def, use noreg instead. This side
4546/// steps issues with MachineCSE not being able to CSE expressions with
4547/// IMPLICIT_DEF operands while preserving the semantic intent. See
4548/// pr64282 for context. Note that this transform is the last one
4549/// performed at ISEL DAG to DAG.
4550bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
4551 bool MadeChange = false;
4552 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
4553
4554 while (Position != CurDAG->allnodes_begin()) {
4555 SDNode *N = &*--Position;
4556 if (N->use_empty() || !N->isMachineOpcode())
4557 continue;
4558
4559 const unsigned Opc = N->getMachineOpcode();
4560 if (!RISCVVPseudosTable::getPseudoInfo(Opc) ||
4562 !isImplicitDef(N->getOperand(0)))
4563 continue;
4564
4566 Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0)));
4567 for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
4568 SDValue Op = N->getOperand(I);
4569 Ops.push_back(Op);
4570 }
4571
4572 MachineSDNode *Result =
4573 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4574 Result->setFlags(N->getFlags());
4575 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands());
4576 ReplaceUses(N, Result);
4577 MadeChange = true;
4578 }
4579 return MadeChange;
4580}
4581
4582
4583// This pass converts a legalized DAG into a RISCV-specific DAG, ready
4584// for instruction scheduling.
4586 CodeGenOptLevel OptLevel) {
4587 return new RISCVDAGToDAGISelLegacy(TM, OptLevel);
4588}
4589
4591
4596
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define DEBUG_TYPE
const HexagonInstrInfo * TII
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
Register const TargetRegisterInfo * TRI
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
static bool getVal(MDTuple *MD, const char *Key, uint64_t &Val)
static bool usesAllOnesMask(SDValue MaskOp)
static Register getTileReg(uint64_t TileNum)
static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, int64_t Imm, const RISCVSubtarget &Subtarget)
static bool isRegRegScaleLoadOrStore(SDNode *User, SDValue Add, const RISCVSubtarget &Subtarget)
Return true if this a load/store that we have a RegRegScale instruction for.
#define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix)
static bool isWorthFoldingAdd(SDValue Add)
static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, RISCVMatInt::InstSeq &Seq)
static bool isImplicitDef(SDValue V)
#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix)
static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, std::function< bool(int64_t)> ValidateImm, bool Decrement=false)
static unsigned getSegInstNF(unsigned Intrinsic)
static bool isWorthFoldingIntoRegRegScale(const RISCVSubtarget &Subtarget, SDValue Add, SDValue Shift=SDValue())
Is it profitable to fold this Add into RegRegScale load/store.
static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo, unsigned Bits, const TargetInstrInfo *TII)
static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, const RISCVSubtarget *Subtarget, SDValue Addr, SDValue &Base, SDValue &Offset, bool IsPrefetch=false)
#define INST_ALL_NF_CASE_WITH_FF(NAME)
#define CASE_VMSLT_OPCODES(lmulenum, suffix)
bool isRegImmLoadOrStore(SDNode *User, SDValue Add)
static cl::opt< bool > UsePseudoMovImm("riscv-use-rematerializable-movimm", cl::Hidden, cl::desc("Use a rematerializable pseudoinstruction for 2 instruction " "constant materialization"), cl::init(false))
static SDValue findVSplat(SDValue N)
#define INST_ALL_NF_CASE(NAME)
static bool isApplicableToPLI(int Val)
Contains matchers for matching SelectionDAG nodes and values.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
#define PASS_NAME
DEMANGLE_DUMP_METHOD void dump() const
bool isZero() const
Definition APFloat.h:1427
APInt bitcastToAPInt() const
Definition APFloat.h:1335
bool isPosZero() const
Definition APFloat.h:1442
bool isNegZero() const
Definition APFloat.h:1443
Class for arbitrary precision integers.
Definition APInt.h:78
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1489
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1258
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1563
const APFloat & getValueAPF() const
uint64_t getZExtValue() const
int64_t getSExtValue() const
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
This class is used to form a handle around another node that is persistent and is updated across invo...
const SDValue & getValue() const
static StringRef getMemConstraintName(ConstraintCode C)
Definition InlineAsm.h:470
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Describe properties that are true of each instruction in the target description file.
Machine Value Type.
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
MVT getVectorElementType() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
void setFlags(Flags f)
Bitwise OR the current flags with the given flags.
An SDNode that represents everything that will be needed to construct a MachineInstr.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
RISCVDAGToDAGISelLegacy(RISCVTargetMachine &TargetMachine, CodeGenOptLevel OptLevel)
bool selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal, SDValue &Val)
RISC-V doesn't have general instructions for integer setne/seteq, but we can check for equality with ...
bool selectSExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectNegImm(SDValue N, SDValue &Val)
bool selectZExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectSHXADD_UWOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD_UW.
bool areOffsetsWithinAlignment(SDValue Addr, Align Alignment)
bool hasAllNBitUsers(SDNode *Node, unsigned Bits, const unsigned Depth=0) const
bool SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, SDValue &Offset)
Similar to SelectAddrRegImm, except that the least significant 5 bits of Offset should be all zeros.
bool selectZExtImm32(SDValue N, SDValue &Val)
bool SelectAddrRegZextRegScale(SDValue Addr, unsigned MaxShiftAmount, unsigned Bits, SDValue &Base, SDValue &Index, SDValue &Scale)
bool SelectAddrRegReg(SDValue Addr, SDValue &Base, SDValue &Offset)
void selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsOrdered)
void selectVLSEGFF(SDNode *Node, unsigned NF, bool IsMasked)
bool selectVSplatSimm5Plus1NoDec(SDValue N, SDValue &SplatVal)
bool selectSimm5Shl2(SDValue N, SDValue &Simm5, SDValue &Shl2)
void selectSF_VC_X_SE(SDNode *Node)
bool orDisjoint(const SDNode *Node) const
bool selectLow8BitsVSplat(SDValue N, SDValue &SplatVal)
bool hasAllHUsers(SDNode *Node) const
bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
bool selectVSplatSimm5(SDValue N, SDValue &SplatVal)
bool selectRVVSimm5(SDValue N, unsigned Width, SDValue &Imm)
bool SelectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset)
bool tryUnsignedBitfieldInsertInZero(SDNode *Node, const SDLoc &DL, MVT VT, SDValue X, unsigned Msb, unsigned Lsb)
bool hasAllWUsers(SDNode *Node) const
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
bool selectInvLogicImm(SDValue N, SDValue &Val)
bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset)
void Select(SDNode *Node) override
Main hook for targets to transform nodes into machine nodes.
void selectXSfmmVSET(SDNode *Node)
bool trySignedBitfieldInsertInSign(SDNode *Node)
bool selectVSplat(SDValue N, SDValue &SplatVal)
void addVectorLoadStoreOperands(SDNode *Node, unsigned SEWImm, const SDLoc &DL, unsigned CurOp, bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl< SDValue > &Operands, bool IsLoad=false, MVT *IndexVT=nullptr)
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
bool SelectAddrRegImm9(SDValue Addr, SDValue &Base, SDValue &Offset)
Similar to SelectAddrRegImm, except that the offset is restricted to uimm9.
bool selectScalarFPAsInt(SDValue N, SDValue &Imm)
bool hasAllBUsers(SDNode *Node) const
void selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsStrided)
bool tryShrinkShlLogicImm(SDNode *Node)
void selectVSETVLI(SDNode *Node)
bool selectVLOp(SDValue N, SDValue &VL)
bool trySignedBitfieldExtract(SDNode *Node)
bool selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal)
void selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsStrided)
bool selectVSplatImm64Neg(SDValue N, SDValue &SplatVal)
bool selectVSplatSimm5Plus1NonZero(SDValue N, SDValue &SplatVal)
bool tryUnsignedBitfieldExtract(SDNode *Node, const SDLoc &DL, MVT VT, SDValue X, unsigned Msb, unsigned Lsb)
void selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsOrdered)
bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt)
bool selectSHXADDOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD.
bool tryIndexedLoad(SDNode *Node)
bool SelectAddrRegRegScale(SDValue Addr, unsigned MaxShiftAmount, SDValue &Base, SDValue &Index, SDValue &Scale)
bool selectVSplatUimm(SDValue N, unsigned Bits, SDValue &SplatVal)
bool hasShlAdd(int64_t ShAmt) const
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
static unsigned getRegClassIDForVecVT(MVT VT)
static RISCVVType::VLMUL getLMUL(MVT VT)
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
SelectionDAGISelLegacy(char &ID, std::unique_ptr< SelectionDAGISel > S)
const TargetLowering * TLI
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const
IsProfitableToFold - Returns true if it's profitable to fold the specific operand node N of U during ...
static bool IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, CodeGenOptLevel OptLevel, bool IgnoreChains=false)
IsLegalToFold - Returns true if the specific operand node N of U can be folded during instruction sel...
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
static constexpr unsigned MaxRecursionDepth
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
ilist< SDNode >::iterator allnodes_iterator
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
iterator_range< user_iterator > users()
Definition Value.h:426
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:807
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:593
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:841
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:662
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:669
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:607
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:876
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
static bool hasVLOp(uint64_t TSFlags)
static bool hasVecPolicyOp(uint64_t TSFlags)
static bool hasSEWOp(uint64_t TSFlags)
static bool isFirstDefTiedToFirstUse(const MCInstrDesc &Desc)
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
SmallVector< Inst, 8 > InstSeq
Definition RISCVMatInt.h:43
static unsigned decodeVSEW(unsigned VSEW)
LLVM_ABI unsigned encodeXSfmmVType(unsigned SEW, unsigned Widen, bool AltFmt)
LLVM_ABI std::pair< unsigned, bool > decodeVLMUL(VLMUL VLMul)
LLVM_ABI unsigned getSEWLMULRatio(unsigned SEW, VLMUL VLMul)
static unsigned decodeTWiden(unsigned TWiden)
LLVM_ABI unsigned encodeVTYPE(VLMUL VLMUL, unsigned SEW, bool TailAgnostic, bool MaskAgnostic, bool AltFmt=false)
unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode)
std::optional< unsigned > getVectorLowDemandedScalarBits(unsigned Opcode, unsigned Log2SEW)
static constexpr unsigned RVVBitsPerBlock
static constexpr int64_t VLMaxSentinel
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
static const MachineMemOperand::Flags MONontemporalBit1
InstructionCost Cost
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:293
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:303
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
static const MachineMemOperand::Flags MONontemporalBit0
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
unsigned M1(unsigned Val)
Definition VE.h:377
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:236
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
constexpr T maskTrailingZeros(unsigned N)
Create a bitmask with the N right-most bits set to 0, and all other bits set to 1.
Definition MathExtras.h:94
@ Add
Sum of integers.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
FunctionPass * createRISCVISelDag(RISCVTargetMachine &TM, CodeGenOptLevel OptLevel)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:867
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.