LLVM 22.0.0git
RISCVISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the RISC-V target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "RISCVISelDAGToDAG.h"
17#include "RISCVISelLowering.h"
18#include "RISCVInstrInfo.h"
22#include "llvm/IR/IntrinsicsRISCV.h"
24#include "llvm/Support/Debug.h"
27
28using namespace llvm;
29
30#define DEBUG_TYPE "riscv-isel"
31#define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
32
34 "riscv-use-rematerializable-movimm", cl::Hidden,
35 cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
36 "constant materialization"),
37 cl::init(false));
38
39#define GET_DAGISEL_BODY RISCVDAGToDAGISel
40#include "RISCVGenDAGISel.inc"
41
43 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
44
45 bool MadeChange = false;
46 while (Position != CurDAG->allnodes_begin()) {
47 SDNode *N = &*--Position;
48 if (N->use_empty())
49 continue;
50
51 SDValue Result;
52 switch (N->getOpcode()) {
53 case ISD::SPLAT_VECTOR: {
54 if (Subtarget->enablePExtCodeGen())
55 break;
56 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
57 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
58 MVT VT = N->getSimpleValueType(0);
59 unsigned Opc =
60 VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
61 SDLoc DL(N);
62 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
63 SDValue Src = N->getOperand(0);
64 if (VT.isInteger())
65 Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(),
66 N->getOperand(0));
67 Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL);
68 break;
69 }
70 case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: {
71 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
72 // load. Done after lowering and combining so that we have a chance to
73 // optimize this to VMV_V_X_VL when the upper bits aren't needed.
74 assert(N->getNumOperands() == 4 && "Unexpected number of operands");
75 MVT VT = N->getSimpleValueType(0);
76 SDValue Passthru = N->getOperand(0);
77 SDValue Lo = N->getOperand(1);
78 SDValue Hi = N->getOperand(2);
79 SDValue VL = N->getOperand(3);
80 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
81 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
82 "Unexpected VTs!");
83 MachineFunction &MF = CurDAG->getMachineFunction();
84 SDLoc DL(N);
85
86 // Create temporary stack for each expanding node.
87 SDValue StackSlot =
88 CurDAG->CreateStackTemporary(TypeSize::getFixed(8), Align(8));
89 int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex();
91
92 SDValue Chain = CurDAG->getEntryNode();
93 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
94
95 SDValue OffsetSlot =
96 CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::getFixed(4), DL);
97 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
98 Align(8));
99
100 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
101
102 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
103 SDValue IntID =
104 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
105 SDValue Ops[] = {Chain,
106 IntID,
107 Passthru,
108 StackSlot,
109 CurDAG->getRegister(RISCV::X0, MVT::i64),
110 VL};
111
112 Result = CurDAG->getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
113 MVT::i64, MPI, Align(8),
115 break;
116 }
117 case ISD::FP_EXTEND: {
118 // We only have vector patterns for riscv_fpextend_vl in isel.
119 SDLoc DL(N);
120 MVT VT = N->getSimpleValueType(0);
121 if (!VT.isVector())
122 break;
123 SDValue VLMAX = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
124 SDValue TrueMask = CurDAG->getNode(
125 RISCVISD::VMSET_VL, DL, VT.changeVectorElementType(MVT::i1), VLMAX);
126 Result = CurDAG->getNode(RISCVISD::FP_EXTEND_VL, DL, VT, N->getOperand(0),
127 TrueMask, VLMAX);
128 break;
129 }
130 }
131
132 if (Result) {
133 LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: ");
134 LLVM_DEBUG(N->dump(CurDAG));
135 LLVM_DEBUG(dbgs() << "\nNew: ");
136 LLVM_DEBUG(Result->dump(CurDAG));
137 LLVM_DEBUG(dbgs() << "\n");
138
139 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
140 MadeChange = true;
141 }
142 }
143
144 if (MadeChange)
145 CurDAG->RemoveDeadNodes();
146}
147
149 HandleSDNode Dummy(CurDAG->getRoot());
150 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
151
152 bool MadeChange = false;
153 while (Position != CurDAG->allnodes_begin()) {
154 SDNode *N = &*--Position;
155 // Skip dead nodes and any non-machine opcodes.
156 if (N->use_empty() || !N->isMachineOpcode())
157 continue;
158
159 MadeChange |= doPeepholeSExtW(N);
160
161 // FIXME: This is here only because the VMerge transform doesn't
162 // know how to handle masked true inputs. Once that has been moved
163 // to post-ISEL, this can be deleted as well.
164 MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N));
165 }
166
167 CurDAG->setRoot(Dummy.getValue());
168
169 // After we're done with everything else, convert IMPLICIT_DEF
170 // passthru operands to NoRegister. This is required to workaround
171 // an optimization deficiency in MachineCSE. This really should
172 // be merged back into each of the patterns (i.e. there's no good
173 // reason not to go directly to NoReg), but is being done this way
174 // to allow easy backporting.
175 MadeChange |= doPeepholeNoRegPassThru();
176
177 if (MadeChange)
178 CurDAG->RemoveDeadNodes();
179}
180
181static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
183 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);
184 for (const RISCVMatInt::Inst &Inst : Seq) {
185 SDValue SDImm = CurDAG->getSignedTargetConstant(Inst.getImm(), DL, VT);
186 SDNode *Result = nullptr;
187 switch (Inst.getOpndKind()) {
188 case RISCVMatInt::Imm:
189 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm);
190 break;
192 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg,
193 CurDAG->getRegister(RISCV::X0, VT));
194 break;
196 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg);
197 break;
199 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm);
200 break;
201 }
202
203 // Only the first instruction has X0 as its source.
204 SrcReg = SDValue(Result, 0);
205 }
206
207 return SrcReg;
208}
209
210static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
211 int64_t Imm, const RISCVSubtarget &Subtarget) {
213
214 // Use a rematerializable pseudo instruction for short sequences if enabled.
215 if (Seq.size() == 2 && UsePseudoMovImm)
216 return SDValue(
217 CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT,
218 CurDAG->getSignedTargetConstant(Imm, DL, VT)),
219 0);
220
221 // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
222 // worst an LUI+ADDIW. This will require an extra register, but avoids a
223 // constant pool.
224 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
225 // low and high 32 bits are the same and bit 31 and 63 are set.
226 if (Seq.size() > 3) {
227 unsigned ShiftAmt, AddOpc;
229 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
230 if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
231 SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
232
233 SDValue SLLI = SDValue(
234 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
235 CurDAG->getTargetConstant(ShiftAmt, DL, VT)),
236 0);
237 return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0);
238 }
239 }
240
241 // Otherwise, use the original sequence.
242 return selectImmSeq(CurDAG, DL, VT, Seq);
243}
244
246 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
247 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
248 bool IsLoad, MVT *IndexVT) {
249 SDValue Chain = Node->getOperand(0);
250
251 Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.
252
253 if (IsStridedOrIndexed) {
254 Operands.push_back(Node->getOperand(CurOp++)); // Index.
255 if (IndexVT)
256 *IndexVT = Operands.back()->getSimpleValueType(0);
257 }
258
259 if (IsMasked) {
260 SDValue Mask = Node->getOperand(CurOp++);
261 Operands.push_back(Mask);
262 }
263 SDValue VL;
264 selectVLOp(Node->getOperand(CurOp++), VL);
265 Operands.push_back(VL);
266
267 MVT XLenVT = Subtarget->getXLenVT();
268 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
269 Operands.push_back(SEWOp);
270
271 // At the IR layer, all the masked load intrinsics have policy operands,
272 // none of the others do. All have passthru operands. For our pseudos,
273 // all loads have policy operands.
274 if (IsLoad) {
276 if (IsMasked)
277 Policy = Node->getConstantOperandVal(CurOp++);
278 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
279 Operands.push_back(PolicyOp);
280 }
281
282 Operands.push_back(Chain); // Chain.
283}
284
285void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked,
286 bool IsStrided) {
287 SDLoc DL(Node);
288 MVT VT = Node->getSimpleValueType(0);
289 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
291
292 unsigned CurOp = 2;
294
295 Operands.push_back(Node->getOperand(CurOp++));
296
297 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
298 Operands, /*IsLoad=*/true);
299
300 const RISCV::VLSEGPseudo *P =
301 RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,
302 static_cast<unsigned>(LMUL));
303 MachineSDNode *Load =
304 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
305
306 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
307
308 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
309 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1));
310 CurDAG->RemoveDeadNode(Node);
311}
312
314 bool IsMasked) {
315 SDLoc DL(Node);
316 MVT VT = Node->getSimpleValueType(0);
317 MVT XLenVT = Subtarget->getXLenVT();
318 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
320
321 unsigned CurOp = 2;
323
324 Operands.push_back(Node->getOperand(CurOp++));
325
326 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
327 /*IsStridedOrIndexed*/ false, Operands,
328 /*IsLoad=*/true);
329
330 const RISCV::VLSEGPseudo *P =
331 RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,
332 Log2SEW, static_cast<unsigned>(LMUL));
333 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
334 XLenVT, MVT::Other, Operands);
335
336 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
337
338 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0)); // Result
339 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1)); // VL
340 ReplaceUses(SDValue(Node, 2), SDValue(Load, 2)); // Chain
341 CurDAG->RemoveDeadNode(Node);
342}
343
344void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked,
345 bool IsOrdered) {
346 SDLoc DL(Node);
347 MVT VT = Node->getSimpleValueType(0);
348 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
350
351 unsigned CurOp = 2;
353
354 Operands.push_back(Node->getOperand(CurOp++));
355
356 MVT IndexVT;
357 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
358 /*IsStridedOrIndexed*/ true, Operands,
359 /*IsLoad=*/true, &IndexVT);
360
361#ifndef NDEBUG
362 // Number of element = RVVBitsPerBlock * LMUL / SEW
363 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
364 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
365 if (DecodedLMUL.second)
366 ContainedTyNumElts /= DecodedLMUL.first;
367 else
368 ContainedTyNumElts *= DecodedLMUL.first;
369 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
370 "Element count mismatch");
371#endif
372
374 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
375 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
376 reportFatalUsageError("The V extension does not support EEW=64 for index "
377 "values when XLEN=32");
378 }
379 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
380 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
381 static_cast<unsigned>(IndexLMUL));
382 MachineSDNode *Load =
383 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
384
385 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
386
387 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
388 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1));
389 CurDAG->RemoveDeadNode(Node);
390}
391
392void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked,
393 bool IsStrided) {
394 SDLoc DL(Node);
395 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
396 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
398
399 unsigned CurOp = 2;
401
402 Operands.push_back(Node->getOperand(CurOp++));
403
404 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
405 Operands);
406
407 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
408 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
409 MachineSDNode *Store =
410 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
411
412 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
413
414 ReplaceNode(Node, Store);
415}
416
417void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked,
418 bool IsOrdered) {
419 SDLoc DL(Node);
420 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
421 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
423
424 unsigned CurOp = 2;
426
427 Operands.push_back(Node->getOperand(CurOp++));
428
429 MVT IndexVT;
430 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
431 /*IsStridedOrIndexed*/ true, Operands,
432 /*IsLoad=*/false, &IndexVT);
433
434#ifndef NDEBUG
435 // Number of element = RVVBitsPerBlock * LMUL / SEW
436 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
437 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
438 if (DecodedLMUL.second)
439 ContainedTyNumElts /= DecodedLMUL.first;
440 else
441 ContainedTyNumElts *= DecodedLMUL.first;
442 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
443 "Element count mismatch");
444#endif
445
447 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
448 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
449 reportFatalUsageError("The V extension does not support EEW=64 for index "
450 "values when XLEN=32");
451 }
452 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
453 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
454 static_cast<unsigned>(IndexLMUL));
455 MachineSDNode *Store =
456 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
457
458 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
459
460 ReplaceNode(Node, Store);
461}
462
464 if (!Subtarget->hasVInstructions())
465 return;
466
467 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
468
469 SDLoc DL(Node);
470 MVT XLenVT = Subtarget->getXLenVT();
471
472 unsigned IntNo = Node->getConstantOperandVal(0);
473
474 assert((IntNo == Intrinsic::riscv_vsetvli ||
475 IntNo == Intrinsic::riscv_vsetvlimax) &&
476 "Unexpected vsetvli intrinsic");
477
478 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
479 unsigned Offset = (VLMax ? 1 : 2);
480
481 assert(Node->getNumOperands() == Offset + 2 &&
482 "Unexpected number of operands");
483
484 unsigned SEW =
485 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
486 RISCVVType::VLMUL VLMul = static_cast<RISCVVType::VLMUL>(
487 Node->getConstantOperandVal(Offset + 1) & 0x7);
488
489 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
490 /*MaskAgnostic*/ true);
491 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
492
493 SDValue VLOperand;
494 unsigned Opcode = RISCV::PseudoVSETVLI;
495 if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
496 if (auto VLEN = Subtarget->getRealVLen())
497 if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())
498 VLMax = true;
499 }
500 if (VLMax || isAllOnesConstant(Node->getOperand(1))) {
501 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
502 Opcode = RISCV::PseudoVSETVLIX0;
503 } else {
504 VLOperand = Node->getOperand(1);
505
506 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
507 uint64_t AVL = C->getZExtValue();
508 if (isUInt<5>(AVL)) {
509 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
510 ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL,
511 XLenVT, VLImm, VTypeIOp));
512 return;
513 }
514 }
515 }
516
518 CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp));
519}
520
522 if (!Subtarget->hasVendorXSfmmbase())
523 return;
524
525 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
526
527 SDLoc DL(Node);
528 MVT XLenVT = Subtarget->getXLenVT();
529
530 unsigned IntNo = Node->getConstantOperandVal(0);
531
532 assert((IntNo == Intrinsic::riscv_sf_vsettnt ||
533 IntNo == Intrinsic::riscv_sf_vsettm ||
534 IntNo == Intrinsic::riscv_sf_vsettk) &&
535 "Unexpected XSfmm vset intrinsic");
536
537 unsigned SEW = RISCVVType::decodeVSEW(Node->getConstantOperandVal(2));
538 unsigned Widen = RISCVVType::decodeTWiden(Node->getConstantOperandVal(3));
539 unsigned PseudoOpCode =
540 IntNo == Intrinsic::riscv_sf_vsettnt ? RISCV::PseudoSF_VSETTNT
541 : IntNo == Intrinsic::riscv_sf_vsettm ? RISCV::PseudoSF_VSETTM
542 : RISCV::PseudoSF_VSETTK;
543
544 if (IntNo == Intrinsic::riscv_sf_vsettnt) {
545 unsigned VTypeI = RISCVVType::encodeXSfmmVType(SEW, Widen, 0);
546 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
547
548 ReplaceNode(Node, CurDAG->getMachineNode(PseudoOpCode, DL, XLenVT,
549 Node->getOperand(1), VTypeIOp));
550 } else {
551 SDValue Log2SEW = CurDAG->getTargetConstant(Log2_32(SEW), DL, XLenVT);
552 SDValue TWiden = CurDAG->getTargetConstant(Widen, DL, XLenVT);
554 CurDAG->getMachineNode(PseudoOpCode, DL, XLenVT,
555 Node->getOperand(1), Log2SEW, TWiden));
556 }
557}
558
560 MVT VT = Node->getSimpleValueType(0);
561 unsigned Opcode = Node->getOpcode();
562 assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&
563 "Unexpected opcode");
564 SDLoc DL(Node);
565
566 // For operations of the form (x << C1) op C2, check if we can use
567 // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
568 SDValue N0 = Node->getOperand(0);
569 SDValue N1 = Node->getOperand(1);
570
572 if (!Cst)
573 return false;
574
575 int64_t Val = Cst->getSExtValue();
576
577 // Check if immediate can already use ANDI/ORI/XORI.
578 if (isInt<12>(Val))
579 return false;
580
581 SDValue Shift = N0;
582
583 // If Val is simm32 and we have a sext_inreg from i32, then the binop
584 // produces at least 33 sign bits. We can peek through the sext_inreg and use
585 // a SLLIW at the end.
586 bool SignExt = false;
587 if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
588 N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) {
589 SignExt = true;
590 Shift = N0.getOperand(0);
591 }
592
593 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
594 return false;
595
597 if (!ShlCst)
598 return false;
599
600 uint64_t ShAmt = ShlCst->getZExtValue();
601
602 // Make sure that we don't change the operation by removing bits.
603 // This only matters for OR and XOR, AND is unaffected.
604 uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt);
605 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
606 return false;
607
608 int64_t ShiftedVal = Val >> ShAmt;
609 if (!isInt<12>(ShiftedVal))
610 return false;
611
612 // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
613 if (SignExt && ShAmt >= 32)
614 return false;
615
616 // Ok, we can reorder to get a smaller immediate.
617 unsigned BinOpc;
618 switch (Opcode) {
619 default: llvm_unreachable("Unexpected opcode");
620 case ISD::AND: BinOpc = RISCV::ANDI; break;
621 case ISD::OR: BinOpc = RISCV::ORI; break;
622 case ISD::XOR: BinOpc = RISCV::XORI; break;
623 }
624
625 unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;
626
627 SDNode *BinOp = CurDAG->getMachineNode(
628 BinOpc, DL, VT, Shift.getOperand(0),
629 CurDAG->getSignedTargetConstant(ShiftedVal, DL, VT));
630 SDNode *SLLI =
631 CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0),
632 CurDAG->getTargetConstant(ShAmt, DL, VT));
633 ReplaceNode(Node, SLLI);
634 return true;
635}
636
638 unsigned Opc;
639
640 if (Subtarget->hasVendorXTHeadBb())
641 Opc = RISCV::TH_EXT;
642 else if (Subtarget->hasVendorXAndesPerf())
643 Opc = RISCV::NDS_BFOS;
644 else if (Subtarget->hasVendorXqcibm())
645 Opc = RISCV::QC_EXT;
646 else
647 // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment.
648 return false;
649
650 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
651 if (!N1C)
652 return false;
653
654 SDValue N0 = Node->getOperand(0);
655 if (!N0.hasOneUse())
656 return false;
657
658 auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb,
659 const SDLoc &DL, MVT VT) {
660 if (Opc == RISCV::QC_EXT) {
661 // QC.EXT X, width, shamt
662 // shamt is the same as Lsb
663 // width is the number of bits to extract from the Lsb
664 Msb = Msb - Lsb + 1;
665 }
666 return CurDAG->getMachineNode(Opc, DL, VT, N0.getOperand(0),
667 CurDAG->getTargetConstant(Msb, DL, VT),
668 CurDAG->getTargetConstant(Lsb, DL, VT));
669 };
670
671 SDLoc DL(Node);
672 MVT VT = Node->getSimpleValueType(0);
673 const unsigned RightShAmt = N1C->getZExtValue();
674
675 // Transform (sra (shl X, C1) C2) with C1 < C2
676 // -> (SignedBitfieldExtract X, msb, lsb)
677 if (N0.getOpcode() == ISD::SHL) {
678 auto *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
679 if (!N01C)
680 return false;
681
682 const unsigned LeftShAmt = N01C->getZExtValue();
683 // Make sure that this is a bitfield extraction (i.e., the shift-right
684 // amount can not be less than the left-shift).
685 if (LeftShAmt > RightShAmt)
686 return false;
687
688 const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
689 const unsigned Msb = MsbPlusOne - 1;
690 const unsigned Lsb = RightShAmt - LeftShAmt;
691
692 SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT);
693 ReplaceNode(Node, Sbe);
694 return true;
695 }
696
697 // Transform (sra (sext_inreg X, _), C) ->
698 // (SignedBitfieldExtract X, msb, lsb)
699 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
700 unsigned ExtSize =
701 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
702
703 // ExtSize of 32 should use sraiw via tablegen pattern.
704 if (ExtSize == 32)
705 return false;
706
707 const unsigned Msb = ExtSize - 1;
708 // If the shift-right amount is greater than Msb, it means that extracts
709 // the X[Msb] bit and sign-extend it.
710 const unsigned Lsb = RightShAmt > Msb ? Msb : RightShAmt;
711
712 SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT);
713 ReplaceNode(Node, Sbe);
714 return true;
715 }
716
717 return false;
718}
719
721 // Only supported with XAndesPerf at the moment.
722 if (!Subtarget->hasVendorXAndesPerf())
723 return false;
724
725 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
726 if (!N1C)
727 return false;
728
729 SDValue N0 = Node->getOperand(0);
730 if (!N0.hasOneUse())
731 return false;
732
733 auto BitfieldInsert = [&](SDValue N0, unsigned Msb, unsigned Lsb,
734 const SDLoc &DL, MVT VT) {
735 unsigned Opc = RISCV::NDS_BFOS;
736 // If the Lsb is equal to the Msb, then the Lsb should be 0.
737 if (Lsb == Msb)
738 Lsb = 0;
739 return CurDAG->getMachineNode(Opc, DL, VT, N0.getOperand(0),
740 CurDAG->getTargetConstant(Lsb, DL, VT),
741 CurDAG->getTargetConstant(Msb, DL, VT));
742 };
743
744 SDLoc DL(Node);
745 MVT VT = Node->getSimpleValueType(0);
746 const unsigned RightShAmt = N1C->getZExtValue();
747
748 // Transform (sra (shl X, C1) C2) with C1 > C2
749 // -> (NDS.BFOS X, lsb, msb)
750 if (N0.getOpcode() == ISD::SHL) {
751 auto *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
752 if (!N01C)
753 return false;
754
755 const unsigned LeftShAmt = N01C->getZExtValue();
756 // Make sure that this is a bitfield insertion (i.e., the shift-right
757 // amount should be less than the left-shift).
758 if (LeftShAmt <= RightShAmt)
759 return false;
760
761 const unsigned MsbPlusOne = VT.getSizeInBits() - RightShAmt;
762 const unsigned Msb = MsbPlusOne - 1;
763 const unsigned Lsb = LeftShAmt - RightShAmt;
764
765 SDNode *Sbi = BitfieldInsert(N0, Msb, Lsb, DL, VT);
766 ReplaceNode(Node, Sbi);
767 return true;
768 }
769
770 return false;
771}
772
774 const SDLoc &DL, MVT VT,
775 SDValue X, unsigned Msb,
776 unsigned Lsb) {
777 unsigned Opc;
778
779 if (Subtarget->hasVendorXTHeadBb()) {
780 Opc = RISCV::TH_EXTU;
781 } else if (Subtarget->hasVendorXAndesPerf()) {
782 Opc = RISCV::NDS_BFOZ;
783 } else if (Subtarget->hasVendorXqcibm()) {
784 Opc = RISCV::QC_EXTU;
785 // QC.EXTU X, width, shamt
786 // shamt is the same as Lsb
787 // width is the number of bits to extract from the Lsb
788 Msb = Msb - Lsb + 1;
789 } else {
790 // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment.
791 return false;
792 }
793
794 SDNode *Ube = CurDAG->getMachineNode(Opc, DL, VT, X,
795 CurDAG->getTargetConstant(Msb, DL, VT),
796 CurDAG->getTargetConstant(Lsb, DL, VT));
797 ReplaceNode(Node, Ube);
798 return true;
799}
800
802 const SDLoc &DL, MVT VT,
803 SDValue X, unsigned Msb,
804 unsigned Lsb) {
805 // Only supported with XAndesPerf at the moment.
806 if (!Subtarget->hasVendorXAndesPerf())
807 return false;
808
809 unsigned Opc = RISCV::NDS_BFOZ;
810
811 // If the Lsb is equal to the Msb, then the Lsb should be 0.
812 if (Lsb == Msb)
813 Lsb = 0;
814 SDNode *Ubi = CurDAG->getMachineNode(Opc, DL, VT, X,
815 CurDAG->getTargetConstant(Lsb, DL, VT),
816 CurDAG->getTargetConstant(Msb, DL, VT));
817 ReplaceNode(Node, Ubi);
818 return true;
819}
820
822 // Target does not support indexed loads.
823 if (!Subtarget->hasVendorXTHeadMemIdx())
824 return false;
825
828 if (AM == ISD::UNINDEXED)
829 return false;
830
832 if (!C)
833 return false;
834
835 EVT LoadVT = Ld->getMemoryVT();
836 assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) &&
837 "Unexpected addressing mode");
838 bool IsPre = AM == ISD::PRE_INC;
839 bool IsPost = AM == ISD::POST_INC;
840 int64_t Offset = C->getSExtValue();
841
842 // The constants that can be encoded in the THeadMemIdx instructions
843 // are of the form (sign_extend(imm5) << imm2).
844 unsigned Shift;
845 for (Shift = 0; Shift < 4; Shift++)
846 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
847 break;
848
849 // Constant cannot be encoded.
850 if (Shift == 4)
851 return false;
852
853 bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
854 unsigned Opcode;
855 if (LoadVT == MVT::i8 && IsPre)
856 Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
857 else if (LoadVT == MVT::i8 && IsPost)
858 Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
859 else if (LoadVT == MVT::i16 && IsPre)
860 Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
861 else if (LoadVT == MVT::i16 && IsPost)
862 Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
863 else if (LoadVT == MVT::i32 && IsPre)
864 Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
865 else if (LoadVT == MVT::i32 && IsPost)
866 Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
867 else if (LoadVT == MVT::i64 && IsPre)
868 Opcode = RISCV::TH_LDIB;
869 else if (LoadVT == MVT::i64 && IsPost)
870 Opcode = RISCV::TH_LDIA;
871 else
872 return false;
873
874 EVT Ty = Ld->getOffset().getValueType();
875 SDValue Ops[] = {
876 Ld->getBasePtr(),
877 CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(Node), Ty),
878 CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty), Ld->getChain()};
879 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0),
880 Ld->getValueType(1), MVT::Other, Ops);
881
882 MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand();
883 CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp});
884
885 ReplaceNode(Node, New);
886
887 return true;
888}
889
890static Register getTileReg(uint64_t TileNum) {
891 assert(TileNum <= 15 && "Invalid tile number");
892 return RISCV::T0 + TileNum;
893}
894
896 if (!Subtarget->hasVInstructions())
897 return;
898
899 assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode");
900
901 SDLoc DL(Node);
902 unsigned IntNo = Node->getConstantOperandVal(1);
903
904 assert((IntNo == Intrinsic::riscv_sf_vc_x_se ||
905 IntNo == Intrinsic::riscv_sf_vc_i_se) &&
906 "Unexpected vsetvli intrinsic");
907
908 // imm, imm, imm, simm5/scalar, sew, log2lmul, vl
909 unsigned Log2SEW = Log2_32(Node->getConstantOperandVal(6));
910 SDValue SEWOp =
911 CurDAG->getTargetConstant(Log2SEW, DL, Subtarget->getXLenVT());
912 SmallVector<SDValue, 8> Operands = {Node->getOperand(2), Node->getOperand(3),
913 Node->getOperand(4), Node->getOperand(5),
914 Node->getOperand(8), SEWOp,
915 Node->getOperand(0)};
916
917 unsigned Opcode;
918 auto *LMulSDNode = cast<ConstantSDNode>(Node->getOperand(7));
919 switch (LMulSDNode->getSExtValue()) {
920 case 5:
921 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF8
922 : RISCV::PseudoSF_VC_I_SE_MF8;
923 break;
924 case 6:
925 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF4
926 : RISCV::PseudoSF_VC_I_SE_MF4;
927 break;
928 case 7:
929 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF2
930 : RISCV::PseudoSF_VC_I_SE_MF2;
931 break;
932 case 0:
933 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M1
934 : RISCV::PseudoSF_VC_I_SE_M1;
935 break;
936 case 1:
937 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M2
938 : RISCV::PseudoSF_VC_I_SE_M2;
939 break;
940 case 2:
941 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M4
942 : RISCV::PseudoSF_VC_I_SE_M4;
943 break;
944 case 3:
945 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M8
946 : RISCV::PseudoSF_VC_I_SE_M8;
947 break;
948 }
949
950 ReplaceNode(Node, CurDAG->getMachineNode(
951 Opcode, DL, Node->getSimpleValueType(0), Operands));
952}
953
954static unsigned getSegInstNF(unsigned Intrinsic) {
955#define INST_NF_CASE(NAME, NF) \
956 case Intrinsic::riscv_##NAME##NF: \
957 return NF;
958#define INST_NF_CASE_MASK(NAME, NF) \
959 case Intrinsic::riscv_##NAME##NF##_mask: \
960 return NF;
961#define INST_NF_CASE_FF(NAME, NF) \
962 case Intrinsic::riscv_##NAME##NF##ff: \
963 return NF;
964#define INST_NF_CASE_FF_MASK(NAME, NF) \
965 case Intrinsic::riscv_##NAME##NF##ff_mask: \
966 return NF;
967#define INST_ALL_NF_CASE_BASE(MACRO_NAME, NAME) \
968 MACRO_NAME(NAME, 2) \
969 MACRO_NAME(NAME, 3) \
970 MACRO_NAME(NAME, 4) \
971 MACRO_NAME(NAME, 5) \
972 MACRO_NAME(NAME, 6) \
973 MACRO_NAME(NAME, 7) \
974 MACRO_NAME(NAME, 8)
975#define INST_ALL_NF_CASE(NAME) \
976 INST_ALL_NF_CASE_BASE(INST_NF_CASE, NAME) \
977 INST_ALL_NF_CASE_BASE(INST_NF_CASE_MASK, NAME)
978#define INST_ALL_NF_CASE_WITH_FF(NAME) \
979 INST_ALL_NF_CASE(NAME) \
980 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF, NAME) \
981 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF_MASK, NAME)
982 switch (Intrinsic) {
983 default:
984 llvm_unreachable("Unexpected segment load/store intrinsic");
986 INST_ALL_NF_CASE(vlsseg)
987 INST_ALL_NF_CASE(vloxseg)
988 INST_ALL_NF_CASE(vluxseg)
989 INST_ALL_NF_CASE(vsseg)
990 INST_ALL_NF_CASE(vssseg)
991 INST_ALL_NF_CASE(vsoxseg)
992 INST_ALL_NF_CASE(vsuxseg)
993 }
994}
995
996static bool isApplicableToPLI(int Val) {
997 // Check if the immediate is packed i8 or i10
998 int16_t Bit31To16 = Val >> 16;
999 int16_t Bit15To0 = Val;
1000 int8_t Bit15To8 = Bit15To0 >> 8;
1001 int8_t Bit7To0 = Val;
1002 if (Bit31To16 != Bit15To0)
1003 return false;
1004
1005 return isInt<10>(Bit31To16) || Bit15To8 == Bit7To0;
1006}
1007
1009 // If we have a custom node, we have already selected.
1010 if (Node->isMachineOpcode()) {
1011 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
1012 Node->setNodeId(-1);
1013 return;
1014 }
1015
1016 // Instruction Selection not handled by the auto-generated tablegen selection
1017 // should be handled here.
1018 unsigned Opcode = Node->getOpcode();
1019 MVT XLenVT = Subtarget->getXLenVT();
1020 SDLoc DL(Node);
1021 MVT VT = Node->getSimpleValueType(0);
1022
1023 bool HasBitTest = Subtarget->hasBEXTILike();
1024
1025 switch (Opcode) {
1026 case ISD::Constant: {
1027 assert((VT == Subtarget->getXLenVT() || VT == MVT::i32) && "Unexpected VT");
1028 auto *ConstNode = cast<ConstantSDNode>(Node);
1029 if (ConstNode->isZero()) {
1030 SDValue New =
1031 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
1032 ReplaceNode(Node, New.getNode());
1033 return;
1034 }
1035 int64_t Imm = ConstNode->getSExtValue();
1036 // If only the lower 8 bits are used, try to convert this to a simm6 by
1037 // sign-extending bit 7. This is neutral without the C extension, and
1038 // allows C.LI to be used if C is present.
1039 if (!isInt<8>(Imm) && isUInt<8>(Imm) && isInt<6>(SignExtend64<8>(Imm)) &&
1041 Imm = SignExtend64<8>(Imm);
1042 // If the upper XLen-16 bits are not used, try to convert this to a simm12
1043 // by sign extending bit 15.
1044 else if (!isInt<16>(Imm) && isUInt<16>(Imm) &&
1046 Imm = SignExtend64<16>(Imm);
1047 // If the upper 32-bits are not used try to convert this into a simm32 by
1048 // sign extending bit 32.
1049 else if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
1050 Imm = SignExtend64<32>(Imm);
1051
1052 if (Subtarget->enablePExtCodeGen() && isApplicableToPLI(Imm) &&
1053 hasAllWUsers(Node)) {
1054 // If it's 4 packed 8-bit integers or 2 packed signed 16-bit integers, we
1055 // can simply copy lower 32 bits to higher 32 bits to make it able to
1056 // rematerialize to PLI_B or PLI_H
1057 Imm = ((uint64_t)Imm << 32) | (Imm & 0xFFFFFFFF);
1058 }
1059
1060 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode());
1061 return;
1062 }
1063 case ISD::ConstantFP: {
1064 const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();
1065
1066 bool Is64Bit = Subtarget->is64Bit();
1067 bool HasZdinx = Subtarget->hasStdExtZdinx();
1068
1069 bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
1070 SDValue Imm;
1071 // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
1072 // create an integer immediate.
1073 if (APF.isPosZero() || NegZeroF64) {
1074 if (VT == MVT::f64 && HasZdinx && !Is64Bit)
1075 Imm = CurDAG->getRegister(RISCV::X0_Pair, MVT::f64);
1076 else
1077 Imm = CurDAG->getRegister(RISCV::X0, XLenVT);
1078 } else {
1079 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
1080 *Subtarget);
1081 }
1082
1083 unsigned Opc;
1084 switch (VT.SimpleTy) {
1085 default:
1086 llvm_unreachable("Unexpected size");
1087 case MVT::bf16:
1088 assert(Subtarget->hasStdExtZfbfmin());
1089 Opc = RISCV::FMV_H_X;
1090 break;
1091 case MVT::f16:
1092 Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
1093 break;
1094 case MVT::f32:
1095 Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
1096 break;
1097 case MVT::f64:
1098 // For RV32, we can't move from a GPR, we need to convert instead. This
1099 // should only happen for +0.0 and -0.0.
1100 assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
1101 if (HasZdinx)
1102 Opc = RISCV::COPY;
1103 else
1104 Opc = Is64Bit ? RISCV::FMV_D_X : RISCV::FCVT_D_W;
1105 break;
1106 }
1107
1108 SDNode *Res;
1109 if (VT.SimpleTy == MVT::f16 && Opc == RISCV::COPY) {
1110 Res =
1111 CurDAG->getTargetExtractSubreg(RISCV::sub_16, DL, VT, Imm).getNode();
1112 } else if (VT.SimpleTy == MVT::f32 && Opc == RISCV::COPY) {
1113 Res =
1114 CurDAG->getTargetExtractSubreg(RISCV::sub_32, DL, VT, Imm).getNode();
1115 } else if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
1116 Res = CurDAG->getMachineNode(
1117 Opc, DL, VT, Imm,
1118 CurDAG->getTargetConstant(RISCVFPRndMode::RNE, DL, XLenVT));
1119 else
1120 Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
1121
1122 // For f64 -0.0, we need to insert a fneg.d idiom.
1123 if (NegZeroF64) {
1124 Opc = RISCV::FSGNJN_D;
1125 if (HasZdinx)
1126 Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;
1127 Res =
1128 CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0));
1129 }
1130
1131 ReplaceNode(Node, Res);
1132 return;
1133 }
1134 case RISCVISD::BuildGPRPair:
1135 case RISCVISD::BuildPairF64: {
1136 if (Opcode == RISCVISD::BuildPairF64 && !Subtarget->hasStdExtZdinx())
1137 break;
1138
1139 assert((!Subtarget->is64Bit() || Opcode == RISCVISD::BuildGPRPair) &&
1140 "BuildPairF64 only handled here on rv32i_zdinx");
1141
1142 SDValue Ops[] = {
1143 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32),
1144 Node->getOperand(0),
1145 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32),
1146 Node->getOperand(1),
1147 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
1148
1149 SDNode *N = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
1150 ReplaceNode(Node, N);
1151 return;
1152 }
1153 case RISCVISD::SplitGPRPair:
1154 case RISCVISD::SplitF64: {
1155 if (Subtarget->hasStdExtZdinx() || Opcode != RISCVISD::SplitF64) {
1156 assert((!Subtarget->is64Bit() || Opcode == RISCVISD::SplitGPRPair) &&
1157 "SplitF64 only handled here on rv32i_zdinx");
1158
1159 if (!SDValue(Node, 0).use_empty()) {
1160 SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL,
1161 Node->getValueType(0),
1162 Node->getOperand(0));
1163 ReplaceUses(SDValue(Node, 0), Lo);
1164 }
1165
1166 if (!SDValue(Node, 1).use_empty()) {
1167 SDValue Hi = CurDAG->getTargetExtractSubreg(
1168 RISCV::sub_gpr_odd, DL, Node->getValueType(1), Node->getOperand(0));
1169 ReplaceUses(SDValue(Node, 1), Hi);
1170 }
1171
1172 CurDAG->RemoveDeadNode(Node);
1173 return;
1174 }
1175
1176 assert(Opcode != RISCVISD::SplitGPRPair &&
1177 "SplitGPRPair should already be handled");
1178
1179 if (!Subtarget->hasStdExtZfa())
1180 break;
1181 assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
1182 "Unexpected subtarget");
1183
1184 // With Zfa, lower to fmv.x.w and fmvh.x.d.
1185 if (!SDValue(Node, 0).use_empty()) {
1186 SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT,
1187 Node->getOperand(0));
1188 ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0));
1189 }
1190 if (!SDValue(Node, 1).use_empty()) {
1191 SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT,
1192 Node->getOperand(0));
1193 ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0));
1194 }
1195
1196 CurDAG->RemoveDeadNode(Node);
1197 return;
1198 }
1199 case ISD::SHL: {
1200 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1201 if (!N1C)
1202 break;
1203 SDValue N0 = Node->getOperand(0);
1204 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
1206 break;
1207 unsigned ShAmt = N1C->getZExtValue();
1208 uint64_t Mask = N0.getConstantOperandVal(1);
1209
1210 if (isShiftedMask_64(Mask)) {
1211 unsigned XLen = Subtarget->getXLen();
1212 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1213 unsigned TrailingZeros = llvm::countr_zero(Mask);
1214 if (ShAmt <= 32 && TrailingZeros > 0 && LeadingZeros == 32) {
1215 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C)
1216 // where C2 has 32 leading zeros and C3 trailing zeros.
1217 SDNode *SRLIW = CurDAG->getMachineNode(
1218 RISCV::SRLIW, DL, VT, N0.getOperand(0),
1219 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1220 SDNode *SLLI = CurDAG->getMachineNode(
1221 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1222 CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));
1223 ReplaceNode(Node, SLLI);
1224 return;
1225 }
1226 if (TrailingZeros == 0 && LeadingZeros > ShAmt &&
1227 XLen - LeadingZeros > 11 && LeadingZeros != 32) {
1228 // Optimize (shl (and X, C2), C) -> (srli (slli X, C4), C4-C)
1229 // where C2 has C4 leading zeros and no trailing zeros.
1230 // This is profitable if the "and" was to be lowered to
1231 // (srli (slli X, C4), C4) and not (andi X, C2).
1232 // For "LeadingZeros == 32":
1233 // - with Zba it's just (slli.uw X, C)
1234 // - without Zba a tablegen pattern applies the very same
1235 // transform as we would have done here
1236 SDNode *SLLI = CurDAG->getMachineNode(
1237 RISCV::SLLI, DL, VT, N0.getOperand(0),
1238 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1239 SDNode *SRLI = CurDAG->getMachineNode(
1240 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1241 CurDAG->getTargetConstant(LeadingZeros - ShAmt, DL, VT));
1242 ReplaceNode(Node, SRLI);
1243 return;
1244 }
1245 }
1246 break;
1247 }
1248 case ISD::SRL: {
1249 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1250 if (!N1C)
1251 break;
1252 SDValue N0 = Node->getOperand(0);
1253 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1254 break;
1255 unsigned ShAmt = N1C->getZExtValue();
1256 uint64_t Mask = N0.getConstantOperandVal(1);
1257
1258 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
1259 // 32 leading zeros and C3 trailing zeros.
1260 if (isShiftedMask_64(Mask) && N0.hasOneUse()) {
1261 unsigned XLen = Subtarget->getXLen();
1262 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1263 unsigned TrailingZeros = llvm::countr_zero(Mask);
1264 if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
1265 SDNode *SRLIW = CurDAG->getMachineNode(
1266 RISCV::SRLIW, DL, VT, N0.getOperand(0),
1267 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1268 SDNode *SLLI = CurDAG->getMachineNode(
1269 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1270 CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT));
1271 ReplaceNode(Node, SLLI);
1272 return;
1273 }
1274 }
1275
1276 // Optimize (srl (and X, C2), C) ->
1277 // (srli (slli X, (XLen-C3), (XLen-C3) + C)
1278 // Where C2 is a mask with C3 trailing ones.
1279 // Taking into account that the C2 may have had lower bits unset by
1280 // SimplifyDemandedBits. This avoids materializing the C2 immediate.
1281 // This pattern occurs when type legalizing right shifts for types with
1282 // less than XLen bits.
1283 Mask |= maskTrailingOnes<uint64_t>(ShAmt);
1284 if (!isMask_64(Mask))
1285 break;
1286 unsigned TrailingOnes = llvm::countr_one(Mask);
1287 if (ShAmt >= TrailingOnes)
1288 break;
1289 // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.
1290 if (TrailingOnes == 32) {
1291 SDNode *SRLI = CurDAG->getMachineNode(
1292 Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT,
1293 N0.getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1294 ReplaceNode(Node, SRLI);
1295 return;
1296 }
1297
1298 // Only do the remaining transforms if the AND has one use.
1299 if (!N0.hasOneUse())
1300 break;
1301
1302 // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1303 if (HasBitTest && ShAmt + 1 == TrailingOnes) {
1304 SDNode *BEXTI = CurDAG->getMachineNode(
1305 Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT,
1306 N0.getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1307 ReplaceNode(Node, BEXTI);
1308 return;
1309 }
1310
1311 const unsigned Msb = TrailingOnes - 1;
1312 const unsigned Lsb = ShAmt;
1313 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0.getOperand(0), Msb, Lsb))
1314 return;
1315
1316 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
1317 SDNode *SLLI =
1318 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1319 CurDAG->getTargetConstant(LShAmt, DL, VT));
1320 SDNode *SRLI = CurDAG->getMachineNode(
1321 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1322 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1323 ReplaceNode(Node, SRLI);
1324 return;
1325 }
1326 case ISD::SRA: {
1328 return;
1329
1331 return;
1332
1333 // Optimize (sra (sext_inreg X, i16), C) ->
1334 // (srai (slli X, (XLen-16), (XLen-16) + C)
1335 // And (sra (sext_inreg X, i8), C) ->
1336 // (srai (slli X, (XLen-8), (XLen-8) + C)
1337 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1338 // This transform matches the code we get without Zbb. The shifts are more
1339 // compressible, and this can help expose CSE opportunities in the sdiv by
1340 // constant optimization.
1341 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1342 if (!N1C)
1343 break;
1344 SDValue N0 = Node->getOperand(0);
1345 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
1346 break;
1347 unsigned ShAmt = N1C->getZExtValue();
1348 unsigned ExtSize =
1349 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
1350 // ExtSize of 32 should use sraiw via tablegen pattern.
1351 if (ExtSize >= 32 || ShAmt >= ExtSize)
1352 break;
1353 unsigned LShAmt = Subtarget->getXLen() - ExtSize;
1354 SDNode *SLLI =
1355 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1356 CurDAG->getTargetConstant(LShAmt, DL, VT));
1357 SDNode *SRAI = CurDAG->getMachineNode(
1358 RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
1359 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1360 ReplaceNode(Node, SRAI);
1361 return;
1362 }
1363 case ISD::OR: {
1365 return;
1366
1367 break;
1368 }
1369 case ISD::XOR:
1371 return;
1372
1373 break;
1374 case ISD::AND: {
1375 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1376 if (!N1C)
1377 break;
1378
1379 SDValue N0 = Node->getOperand(0);
1380
1381 bool LeftShift = N0.getOpcode() == ISD::SHL;
1382 if (LeftShift || N0.getOpcode() == ISD::SRL) {
1383 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1384 if (!C)
1385 break;
1386 unsigned C2 = C->getZExtValue();
1387 unsigned XLen = Subtarget->getXLen();
1388 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1389
1390 // Keep track of whether this is a c.andi. If we can't use c.andi, the
1391 // shift pair might offer more compression opportunities.
1392 // TODO: We could check for C extension here, but we don't have many lit
1393 // tests with the C extension enabled so not checking gets better
1394 // coverage.
1395 // TODO: What if ANDI faster than shift?
1396 bool IsCANDI = isInt<6>(N1C->getSExtValue());
1397
1398 uint64_t C1 = N1C->getZExtValue();
1399
1400 // Clear irrelevant bits in the mask.
1401 if (LeftShift)
1403 else
1404 C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
1405
1406 // Some transforms should only be done if the shift has a single use or
1407 // the AND would become (srli (slli X, 32), 32)
1408 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
1409
1410 SDValue X = N0.getOperand(0);
1411
1412 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1413 // with c3 leading zeros.
1414 if (!LeftShift && isMask_64(C1)) {
1415 unsigned Leading = XLen - llvm::bit_width(C1);
1416 if (C2 < Leading) {
1417 // If the number of leading zeros is C2+32 this can be SRLIW.
1418 if (C2 + 32 == Leading) {
1419 SDNode *SRLIW = CurDAG->getMachineNode(
1420 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
1421 ReplaceNode(Node, SRLIW);
1422 return;
1423 }
1424
1425 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1426 // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1427 //
1428 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1429 // legalized and goes through DAG combine.
1430 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
1431 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1432 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
1433 SDNode *SRAIW =
1434 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0),
1435 CurDAG->getTargetConstant(31, DL, VT));
1436 SDNode *SRLIW = CurDAG->getMachineNode(
1437 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),
1438 CurDAG->getTargetConstant(Leading - 32, DL, VT));
1439 ReplaceNode(Node, SRLIW);
1440 return;
1441 }
1442
1443 // Try to use an unsigned bitfield extract (e.g., th.extu) if
1444 // available.
1445 // Transform (and (srl x, C2), C1)
1446 // -> (<bfextract> x, msb, lsb)
1447 //
1448 // Make sure to keep this below the SRLIW cases, as we always want to
1449 // prefer the more common instruction.
1450 const unsigned Msb = llvm::bit_width(C1) + C2 - 1;
1451 const unsigned Lsb = C2;
1452 if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
1453 return;
1454
1455 // (srli (slli x, c3-c2), c3).
1456 // Skip if we could use (zext.w (sraiw X, C2)).
1457 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
1458 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1459 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
1460 // Also Skip if we can use bexti or th.tst.
1461 Skip |= HasBitTest && Leading == XLen - 1;
1462 if (OneUseOrZExtW && !Skip) {
1463 SDNode *SLLI = CurDAG->getMachineNode(
1464 RISCV::SLLI, DL, VT, X,
1465 CurDAG->getTargetConstant(Leading - C2, DL, VT));
1466 SDNode *SRLI = CurDAG->getMachineNode(
1467 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1468 CurDAG->getTargetConstant(Leading, DL, VT));
1469 ReplaceNode(Node, SRLI);
1470 return;
1471 }
1472 }
1473 }
1474
1475 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1476 // shifted by c2 bits with c3 leading zeros.
1477 if (LeftShift && isShiftedMask_64(C1)) {
1478 unsigned Leading = XLen - llvm::bit_width(C1);
1479
1480 if (C2 + Leading < XLen &&
1481 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) {
1482 // Use slli.uw when possible.
1483 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
1484 SDNode *SLLI_UW =
1485 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X,
1486 CurDAG->getTargetConstant(C2, DL, VT));
1487 ReplaceNode(Node, SLLI_UW);
1488 return;
1489 }
1490
1491 // Try to use an unsigned bitfield insert (e.g., nds.bfoz) if
1492 // available.
1493 // Transform (and (shl x, c2), c1)
1494 // -> (<bfinsert> x, msb, lsb)
1495 // e.g.
1496 // (and (shl x, 12), 0x00fff000)
1497 // If XLen = 32 and C2 = 12, then
1498 // Msb = 32 - 8 - 1 = 23 and Lsb = 12
1499 const unsigned Msb = XLen - Leading - 1;
1500 const unsigned Lsb = C2;
1501 if (tryUnsignedBitfieldInsertInZero(Node, DL, VT, X, Msb, Lsb))
1502 return;
1503
1504 if (OneUseOrZExtW && !IsCANDI) {
1505 // (packh x0, X)
1506 if (Subtarget->hasStdExtZbkb() && C1 == 0xff00 && C2 == 8) {
1507 SDNode *PACKH = CurDAG->getMachineNode(
1508 RISCV::PACKH, DL, VT,
1509 CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT()), X);
1510 ReplaceNode(Node, PACKH);
1511 return;
1512 }
1513 // (srli (slli c2+c3), c3)
1514 SDNode *SLLI = CurDAG->getMachineNode(
1515 RISCV::SLLI, DL, VT, X,
1516 CurDAG->getTargetConstant(C2 + Leading, DL, VT));
1517 SDNode *SRLI = CurDAG->getMachineNode(
1518 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1519 CurDAG->getTargetConstant(Leading, DL, VT));
1520 ReplaceNode(Node, SRLI);
1521 return;
1522 }
1523 }
1524 }
1525
1526 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1527 // shifted mask with c2 leading zeros and c3 trailing zeros.
1528 if (!LeftShift && isShiftedMask_64(C1)) {
1529 unsigned Leading = XLen - llvm::bit_width(C1);
1530 unsigned Trailing = llvm::countr_zero(C1);
1531 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
1532 !IsCANDI) {
1533 unsigned SrliOpc = RISCV::SRLI;
1534 // If the input is zexti32 we should use SRLIW.
1535 if (X.getOpcode() == ISD::AND &&
1536 isa<ConstantSDNode>(X.getOperand(1)) &&
1537 X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
1538 SrliOpc = RISCV::SRLIW;
1539 X = X.getOperand(0);
1540 }
1541 SDNode *SRLI = CurDAG->getMachineNode(
1542 SrliOpc, DL, VT, X,
1543 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1544 SDNode *SLLI = CurDAG->getMachineNode(
1545 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1546 CurDAG->getTargetConstant(Trailing, DL, VT));
1547 ReplaceNode(Node, SLLI);
1548 return;
1549 }
1550 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1551 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
1552 OneUseOrZExtW && !IsCANDI) {
1553 SDNode *SRLIW = CurDAG->getMachineNode(
1554 RISCV::SRLIW, DL, VT, X,
1555 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1556 SDNode *SLLI = CurDAG->getMachineNode(
1557 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1558 CurDAG->getTargetConstant(Trailing, DL, VT));
1559 ReplaceNode(Node, SLLI);
1560 return;
1561 }
1562 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1563 if (Trailing > 0 && Leading + Trailing == 32 && C2 + Trailing < XLen &&
1564 OneUseOrZExtW && Subtarget->hasStdExtZba()) {
1565 SDNode *SRLI = CurDAG->getMachineNode(
1566 RISCV::SRLI, DL, VT, X,
1567 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1568 SDNode *SLLI_UW = CurDAG->getMachineNode(
1569 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1570 CurDAG->getTargetConstant(Trailing, DL, VT));
1571 ReplaceNode(Node, SLLI_UW);
1572 return;
1573 }
1574 }
1575
1576 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1577 // shifted mask with no leading zeros and c3 trailing zeros.
1578 if (LeftShift && isShiftedMask_64(C1)) {
1579 unsigned Leading = XLen - llvm::bit_width(C1);
1580 unsigned Trailing = llvm::countr_zero(C1);
1581 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
1582 SDNode *SRLI = CurDAG->getMachineNode(
1583 RISCV::SRLI, DL, VT, X,
1584 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1585 SDNode *SLLI = CurDAG->getMachineNode(
1586 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1587 CurDAG->getTargetConstant(Trailing, DL, VT));
1588 ReplaceNode(Node, SLLI);
1589 return;
1590 }
1591 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1592 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
1593 SDNode *SRLIW = CurDAG->getMachineNode(
1594 RISCV::SRLIW, DL, VT, X,
1595 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1596 SDNode *SLLI = CurDAG->getMachineNode(
1597 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1598 CurDAG->getTargetConstant(Trailing, DL, VT));
1599 ReplaceNode(Node, SLLI);
1600 return;
1601 }
1602
1603 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1604 if (C2 < Trailing && Leading + Trailing == 32 && OneUseOrZExtW &&
1605 Subtarget->hasStdExtZba()) {
1606 SDNode *SRLI = CurDAG->getMachineNode(
1607 RISCV::SRLI, DL, VT, X,
1608 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1609 SDNode *SLLI_UW = CurDAG->getMachineNode(
1610 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1611 CurDAG->getTargetConstant(Trailing, DL, VT));
1612 ReplaceNode(Node, SLLI_UW);
1613 return;
1614 }
1615 }
1616 }
1617
1618 const uint64_t C1 = N1C->getZExtValue();
1619
1620 if (N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
1621 N0.hasOneUse()) {
1622 unsigned C2 = N0.getConstantOperandVal(1);
1623 unsigned XLen = Subtarget->getXLen();
1624 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1625
1626 SDValue X = N0.getOperand(0);
1627
1628 // Prefer SRAIW + ANDI when possible.
1629 bool Skip = C2 > 32 && isInt<12>(N1C->getSExtValue()) &&
1630 X.getOpcode() == ISD::SHL &&
1631 isa<ConstantSDNode>(X.getOperand(1)) &&
1632 X.getConstantOperandVal(1) == 32;
1633 // Turn (and (sra x, c2), c1) -> (srli (srai x, c2-c3), c3) if c1 is a
1634 // mask with c3 leading zeros and c2 is larger than c3.
1635 if (isMask_64(C1) && !Skip) {
1636 unsigned Leading = XLen - llvm::bit_width(C1);
1637 if (C2 > Leading) {
1638 SDNode *SRAI = CurDAG->getMachineNode(
1639 RISCV::SRAI, DL, VT, X,
1640 CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1641 SDNode *SRLI = CurDAG->getMachineNode(
1642 RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1643 CurDAG->getTargetConstant(Leading, DL, VT));
1644 ReplaceNode(Node, SRLI);
1645 return;
1646 }
1647 }
1648
1649 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
1650 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
1651 // use (slli (srli (srai y, c2 - c3), c3 + c4), c4).
1652 if (isShiftedMask_64(C1) && !Skip) {
1653 unsigned Leading = XLen - llvm::bit_width(C1);
1654 unsigned Trailing = llvm::countr_zero(C1);
1655 if (C2 > Leading && Leading > 0 && Trailing > 0) {
1656 SDNode *SRAI = CurDAG->getMachineNode(
1657 RISCV::SRAI, DL, VT, N0.getOperand(0),
1658 CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1659 SDNode *SRLI = CurDAG->getMachineNode(
1660 RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1661 CurDAG->getTargetConstant(Leading + Trailing, DL, VT));
1662 SDNode *SLLI = CurDAG->getMachineNode(
1663 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1664 CurDAG->getTargetConstant(Trailing, DL, VT));
1665 ReplaceNode(Node, SLLI);
1666 return;
1667 }
1668 }
1669 }
1670
1671 // If C1 masks off the upper bits only (but can't be formed as an
1672 // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1673 // available.
1674 // Transform (and x, C1)
1675 // -> (<bfextract> x, msb, lsb)
1676 if (isMask_64(C1) && !isInt<12>(N1C->getSExtValue()) &&
1677 !(C1 == 0xffff && Subtarget->hasStdExtZbb()) &&
1678 !(C1 == 0xffffffff && Subtarget->hasStdExtZba())) {
1679 const unsigned Msb = llvm::bit_width(C1) - 1;
1680 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0))
1681 return;
1682 }
1683
1685 return;
1686
1687 break;
1688 }
1689 case ISD::MUL: {
1690 // Special case for calculating (mul (and X, C2), C1) where the full product
1691 // fits in XLen bits. We can shift X left by the number of leading zeros in
1692 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1693 // product has XLen trailing zeros, putting it in the output of MULHU. This
1694 // can avoid materializing a constant in a register for C2.
1695
1696 // RHS should be a constant.
1697 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1698 if (!N1C || !N1C->hasOneUse())
1699 break;
1700
1701 // LHS should be an AND with constant.
1702 SDValue N0 = Node->getOperand(0);
1703 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1704 break;
1705
1707
1708 // Constant should be a mask.
1709 if (!isMask_64(C2))
1710 break;
1711
1712 // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1713 // multiple users or the constant is a simm12. This prevents inserting a
1714 // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1715 // make it more costly to materialize. Otherwise, using a SLLI might allow
1716 // it to be compressed.
1717 bool IsANDIOrZExt =
1718 isInt<12>(C2) ||
1719 (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
1720 // With XTHeadBb, we can use TH.EXTU.
1721 IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
1722 if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse()))
1723 break;
1724 // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1725 // the constant is a simm32.
1726 bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
1727 // With XTHeadBb, we can use TH.EXTU.
1728 IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
1729 if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse()))
1730 break;
1731
1732 // We need to shift left the AND input and C1 by a total of XLen bits.
1733
1734 // How far left do we need to shift the AND input?
1735 unsigned XLen = Subtarget->getXLen();
1736 unsigned LeadingZeros = XLen - llvm::bit_width(C2);
1737
1738 // The constant gets shifted by the remaining amount unless that would
1739 // shift bits out.
1740 uint64_t C1 = N1C->getZExtValue();
1741 unsigned ConstantShift = XLen - LeadingZeros;
1742 if (ConstantShift > (XLen - llvm::bit_width(C1)))
1743 break;
1744
1745 uint64_t ShiftedC1 = C1 << ConstantShift;
1746 // If this RV32, we need to sign extend the constant.
1747 if (XLen == 32)
1748 ShiftedC1 = SignExtend64<32>(ShiftedC1);
1749
1750 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1751 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode();
1752 SDNode *SLLI =
1753 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1754 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1755 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,
1756 SDValue(SLLI, 0), SDValue(Imm, 0));
1757 ReplaceNode(Node, MULHU);
1758 return;
1759 }
1760 case ISD::LOAD: {
1761 if (tryIndexedLoad(Node))
1762 return;
1763
1764 if (Subtarget->hasVendorXCVmem() && !Subtarget->is64Bit()) {
1765 // We match post-incrementing load here
1767 if (Load->getAddressingMode() != ISD::POST_INC)
1768 break;
1769
1770 SDValue Chain = Node->getOperand(0);
1771 SDValue Base = Node->getOperand(1);
1772 SDValue Offset = Node->getOperand(2);
1773
1774 bool Simm12 = false;
1775 bool SignExtend = Load->getExtensionType() == ISD::SEXTLOAD;
1776
1777 if (auto ConstantOffset = dyn_cast<ConstantSDNode>(Offset)) {
1778 int ConstantVal = ConstantOffset->getSExtValue();
1779 Simm12 = isInt<12>(ConstantVal);
1780 if (Simm12)
1781 Offset = CurDAG->getTargetConstant(ConstantVal, SDLoc(Offset),
1782 Offset.getValueType());
1783 }
1784
1785 unsigned Opcode = 0;
1786 switch (Load->getMemoryVT().getSimpleVT().SimpleTy) {
1787 case MVT::i8:
1788 if (Simm12 && SignExtend)
1789 Opcode = RISCV::CV_LB_ri_inc;
1790 else if (Simm12 && !SignExtend)
1791 Opcode = RISCV::CV_LBU_ri_inc;
1792 else if (!Simm12 && SignExtend)
1793 Opcode = RISCV::CV_LB_rr_inc;
1794 else
1795 Opcode = RISCV::CV_LBU_rr_inc;
1796 break;
1797 case MVT::i16:
1798 if (Simm12 && SignExtend)
1799 Opcode = RISCV::CV_LH_ri_inc;
1800 else if (Simm12 && !SignExtend)
1801 Opcode = RISCV::CV_LHU_ri_inc;
1802 else if (!Simm12 && SignExtend)
1803 Opcode = RISCV::CV_LH_rr_inc;
1804 else
1805 Opcode = RISCV::CV_LHU_rr_inc;
1806 break;
1807 case MVT::i32:
1808 if (Simm12)
1809 Opcode = RISCV::CV_LW_ri_inc;
1810 else
1811 Opcode = RISCV::CV_LW_rr_inc;
1812 break;
1813 default:
1814 break;
1815 }
1816 if (!Opcode)
1817 break;
1818
1819 ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, XLenVT, XLenVT,
1820 Chain.getSimpleValueType(), Base,
1821 Offset, Chain));
1822 return;
1823 }
1824 break;
1825 }
1826 case RISCVISD::LD_RV32: {
1827 assert(Subtarget->hasStdExtZilsd() && "LD_RV32 is only used with Zilsd");
1828
1830 SDValue Chain = Node->getOperand(0);
1831 SDValue Addr = Node->getOperand(1);
1833
1834 SDValue Ops[] = {Base, Offset, Chain};
1835 MachineSDNode *New = CurDAG->getMachineNode(
1836 RISCV::LD_RV32, DL, {MVT::Untyped, MVT::Other}, Ops);
1837 SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL,
1838 MVT::i32, SDValue(New, 0));
1839 SDValue Hi = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_odd, DL,
1840 MVT::i32, SDValue(New, 0));
1841 CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()});
1842 ReplaceUses(SDValue(Node, 0), Lo);
1843 ReplaceUses(SDValue(Node, 1), Hi);
1844 ReplaceUses(SDValue(Node, 2), SDValue(New, 1));
1845 CurDAG->RemoveDeadNode(Node);
1846 return;
1847 }
1848 case RISCVISD::SD_RV32: {
1850 SDValue Chain = Node->getOperand(0);
1851 SDValue Addr = Node->getOperand(3);
1853
1854 SDValue Lo = Node->getOperand(1);
1855 SDValue Hi = Node->getOperand(2);
1856
1857 SDValue RegPair;
1858 // Peephole to use X0_Pair for storing zero.
1860 RegPair = CurDAG->getRegister(RISCV::X0_Pair, MVT::Untyped);
1861 } else {
1862 SDValue Ops[] = {
1863 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32), Lo,
1864 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32), Hi,
1865 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
1866
1867 RegPair = SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
1868 MVT::Untyped, Ops),
1869 0);
1870 }
1871
1872 MachineSDNode *New = CurDAG->getMachineNode(RISCV::SD_RV32, DL, MVT::Other,
1873 {RegPair, Base, Offset, Chain});
1874 CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()});
1875 ReplaceUses(SDValue(Node, 0), SDValue(New, 0));
1876 CurDAG->RemoveDeadNode(Node);
1877 return;
1878 }
1879 case RISCVISD::PPACK_DH: {
1880 assert(Subtarget->enablePExtCodeGen() && Subtarget->isRV32());
1881
1882 SDValue Val0 = Node->getOperand(0);
1883 SDValue Val1 = Node->getOperand(1);
1884 SDValue Val2 = Node->getOperand(2);
1885 SDValue Val3 = Node->getOperand(3);
1886
1887 SDValue Ops[] = {
1888 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32), Val0,
1889 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32), Val2,
1890 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
1891 SDValue RegPair0 =
1892 SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
1893 MVT::Untyped, Ops),
1894 0);
1895 SDValue Ops1[] = {
1896 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32), Val1,
1897 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32), Val3,
1898 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
1899 SDValue RegPair1 =
1900 SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
1901 MVT::Untyped, Ops1),
1902 0);
1903
1904 MachineSDNode *PackDH = CurDAG->getMachineNode(
1905 RISCV::PPAIRE_DB, DL, MVT::Untyped, {RegPair0, RegPair1});
1906
1907 SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL,
1908 MVT::i32, SDValue(PackDH, 0));
1909 SDValue Hi = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_odd, DL,
1910 MVT::i32, SDValue(PackDH, 0));
1911 ReplaceUses(SDValue(Node, 0), Lo);
1912 ReplaceUses(SDValue(Node, 1), Hi);
1913 CurDAG->RemoveDeadNode(Node);
1914 return;
1915 }
1917 unsigned IntNo = Node->getConstantOperandVal(0);
1918 switch (IntNo) {
1919 // By default we do not custom select any intrinsic.
1920 default:
1921 break;
1922 case Intrinsic::riscv_vmsgeu:
1923 case Intrinsic::riscv_vmsge: {
1924 SDValue Src1 = Node->getOperand(1);
1925 SDValue Src2 = Node->getOperand(2);
1926 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
1927 bool IsCmpConstant = false;
1928 bool IsCmpMinimum = false;
1929 // Only custom select scalar second operand.
1930 if (Src2.getValueType() != XLenVT)
1931 break;
1932 // Small constants are handled with patterns.
1933 int64_t CVal = 0;
1934 MVT Src1VT = Src1.getSimpleValueType();
1935 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1936 IsCmpConstant = true;
1937 CVal = C->getSExtValue();
1938 if (CVal >= -15 && CVal <= 16) {
1939 if (!IsUnsigned || CVal != 0)
1940 break;
1941 IsCmpMinimum = true;
1942 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
1943 Src1VT.getScalarSizeInBits())
1944 .getSExtValue()) {
1945 IsCmpMinimum = true;
1946 }
1947 }
1948 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode, VMSGTOpcode;
1949 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1950 default:
1951 llvm_unreachable("Unexpected LMUL!");
1952#define CASE_VMSLT_OPCODES(lmulenum, suffix) \
1953 case RISCVVType::lmulenum: \
1954 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1955 : RISCV::PseudoVMSLT_VX_##suffix; \
1956 VMSGTOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix \
1957 : RISCV::PseudoVMSGT_VX_##suffix; \
1958 break;
1959 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
1960 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
1961 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
1962 CASE_VMSLT_OPCODES(LMUL_1, M1)
1963 CASE_VMSLT_OPCODES(LMUL_2, M2)
1964 CASE_VMSLT_OPCODES(LMUL_4, M4)
1965 CASE_VMSLT_OPCODES(LMUL_8, M8)
1966#undef CASE_VMSLT_OPCODES
1967 }
1968 // Mask operations use the LMUL from the mask type.
1969 switch (RISCVTargetLowering::getLMUL(VT)) {
1970 default:
1971 llvm_unreachable("Unexpected LMUL!");
1972#define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix) \
1973 case RISCVVType::lmulenum: \
1974 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \
1975 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix; \
1976 break;
1977 CASE_VMNAND_VMSET_OPCODES(LMUL_F8, B64)
1978 CASE_VMNAND_VMSET_OPCODES(LMUL_F4, B32)
1979 CASE_VMNAND_VMSET_OPCODES(LMUL_F2, B16)
1980 CASE_VMNAND_VMSET_OPCODES(LMUL_1, B8)
1981 CASE_VMNAND_VMSET_OPCODES(LMUL_2, B4)
1982 CASE_VMNAND_VMSET_OPCODES(LMUL_4, B2)
1983 CASE_VMNAND_VMSET_OPCODES(LMUL_8, B1)
1984#undef CASE_VMNAND_VMSET_OPCODES
1985 }
1986 SDValue SEW = CurDAG->getTargetConstant(
1987 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1988 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
1989 SDValue VL;
1990 selectVLOp(Node->getOperand(3), VL);
1991
1992 // If vmsge(u) with minimum value, expand it to vmset.
1993 if (IsCmpMinimum) {
1995 CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, MaskSEW));
1996 return;
1997 }
1998
1999 if (IsCmpConstant) {
2000 SDValue Imm =
2001 selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget);
2002
2003 ReplaceNode(Node, CurDAG->getMachineNode(VMSGTOpcode, DL, VT,
2004 {Src1, Imm, VL, SEW}));
2005 return;
2006 }
2007
2008 // Expand to
2009 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
2010 SDValue Cmp = SDValue(
2011 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
2012 0);
2013 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT,
2014 {Cmp, Cmp, VL, MaskSEW}));
2015 return;
2016 }
2017 case Intrinsic::riscv_vmsgeu_mask:
2018 case Intrinsic::riscv_vmsge_mask: {
2019 SDValue Src1 = Node->getOperand(2);
2020 SDValue Src2 = Node->getOperand(3);
2021 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
2022 bool IsCmpConstant = false;
2023 bool IsCmpMinimum = false;
2024 // Only custom select scalar second operand.
2025 if (Src2.getValueType() != XLenVT)
2026 break;
2027 // Small constants are handled with patterns.
2028 MVT Src1VT = Src1.getSimpleValueType();
2029 int64_t CVal = 0;
2030 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
2031 IsCmpConstant = true;
2032 CVal = C->getSExtValue();
2033 if (CVal >= -15 && CVal <= 16) {
2034 if (!IsUnsigned || CVal != 0)
2035 break;
2036 IsCmpMinimum = true;
2037 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
2038 Src1VT.getScalarSizeInBits())
2039 .getSExtValue()) {
2040 IsCmpMinimum = true;
2041 }
2042 }
2043 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
2044 VMOROpcode, VMSGTMaskOpcode;
2045 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
2046 default:
2047 llvm_unreachable("Unexpected LMUL!");
2048#define CASE_VMSLT_OPCODES(lmulenum, suffix) \
2049 case RISCVVType::lmulenum: \
2050 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
2051 : RISCV::PseudoVMSLT_VX_##suffix; \
2052 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \
2053 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \
2054 VMSGTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix##_MASK \
2055 : RISCV::PseudoVMSGT_VX_##suffix##_MASK; \
2056 break;
2057 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
2058 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
2059 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
2060 CASE_VMSLT_OPCODES(LMUL_1, M1)
2061 CASE_VMSLT_OPCODES(LMUL_2, M2)
2062 CASE_VMSLT_OPCODES(LMUL_4, M4)
2063 CASE_VMSLT_OPCODES(LMUL_8, M8)
2064#undef CASE_VMSLT_OPCODES
2065 }
2066 // Mask operations use the LMUL from the mask type.
2067 switch (RISCVTargetLowering::getLMUL(VT)) {
2068 default:
2069 llvm_unreachable("Unexpected LMUL!");
2070#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \
2071 case RISCVVType::lmulenum: \
2072 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \
2073 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \
2074 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \
2075 break;
2076 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, B64)
2077 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, B32)
2078 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, B16)
2083#undef CASE_VMXOR_VMANDN_VMOR_OPCODES
2084 }
2085 SDValue SEW = CurDAG->getTargetConstant(
2086 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
2087 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
2088 SDValue VL;
2089 selectVLOp(Node->getOperand(5), VL);
2090 SDValue MaskedOff = Node->getOperand(1);
2091 SDValue Mask = Node->getOperand(4);
2092
2093 // If vmsge(u) with minimum value, expand it to vmor mask, maskedoff.
2094 if (IsCmpMinimum) {
2095 // We don't need vmor if the MaskedOff and the Mask are the same
2096 // value.
2097 if (Mask == MaskedOff) {
2098 ReplaceUses(Node, Mask.getNode());
2099 return;
2100 }
2102 CurDAG->getMachineNode(VMOROpcode, DL, VT,
2103 {Mask, MaskedOff, VL, MaskSEW}));
2104 return;
2105 }
2106
2107 // If the MaskedOff value and the Mask are the same value use
2108 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
2109 // This avoids needing to copy v0 to vd before starting the next sequence.
2110 if (Mask == MaskedOff) {
2111 SDValue Cmp = SDValue(
2112 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
2113 0);
2114 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
2115 {Mask, Cmp, VL, MaskSEW}));
2116 return;
2117 }
2118
2119 SDValue PolicyOp =
2120 CurDAG->getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT);
2121
2122 if (IsCmpConstant) {
2123 SDValue Imm =
2124 selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget);
2125
2126 ReplaceNode(Node, CurDAG->getMachineNode(
2127 VMSGTMaskOpcode, DL, VT,
2128 {MaskedOff, Src1, Imm, Mask, VL, SEW, PolicyOp}));
2129 return;
2130 }
2131
2132 // Otherwise use
2133 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
2134 // The result is mask undisturbed.
2135 // We use the same instructions to emulate mask agnostic behavior, because
2136 // the agnostic result can be either undisturbed or all 1.
2137 SDValue Cmp = SDValue(CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
2138 {MaskedOff, Src1, Src2, Mask,
2139 VL, SEW, PolicyOp}),
2140 0);
2141 // vmxor.mm vd, vd, v0 is used to update active value.
2142 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
2143 {Cmp, Mask, VL, MaskSEW}));
2144 return;
2145 }
2146 case Intrinsic::riscv_vsetvli:
2147 case Intrinsic::riscv_vsetvlimax:
2148 return selectVSETVLI(Node);
2149 case Intrinsic::riscv_sf_vsettnt:
2150 case Intrinsic::riscv_sf_vsettm:
2151 case Intrinsic::riscv_sf_vsettk:
2152 return selectXSfmmVSET(Node);
2153 }
2154 break;
2155 }
2157 unsigned IntNo = Node->getConstantOperandVal(1);
2158 switch (IntNo) {
2159 // By default we do not custom select any intrinsic.
2160 default:
2161 break;
2162 case Intrinsic::riscv_vlseg2:
2163 case Intrinsic::riscv_vlseg3:
2164 case Intrinsic::riscv_vlseg4:
2165 case Intrinsic::riscv_vlseg5:
2166 case Intrinsic::riscv_vlseg6:
2167 case Intrinsic::riscv_vlseg7:
2168 case Intrinsic::riscv_vlseg8: {
2169 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2170 /*IsStrided*/ false);
2171 return;
2172 }
2173 case Intrinsic::riscv_vlseg2_mask:
2174 case Intrinsic::riscv_vlseg3_mask:
2175 case Intrinsic::riscv_vlseg4_mask:
2176 case Intrinsic::riscv_vlseg5_mask:
2177 case Intrinsic::riscv_vlseg6_mask:
2178 case Intrinsic::riscv_vlseg7_mask:
2179 case Intrinsic::riscv_vlseg8_mask: {
2180 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2181 /*IsStrided*/ false);
2182 return;
2183 }
2184 case Intrinsic::riscv_vlsseg2:
2185 case Intrinsic::riscv_vlsseg3:
2186 case Intrinsic::riscv_vlsseg4:
2187 case Intrinsic::riscv_vlsseg5:
2188 case Intrinsic::riscv_vlsseg6:
2189 case Intrinsic::riscv_vlsseg7:
2190 case Intrinsic::riscv_vlsseg8: {
2191 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2192 /*IsStrided*/ true);
2193 return;
2194 }
2195 case Intrinsic::riscv_vlsseg2_mask:
2196 case Intrinsic::riscv_vlsseg3_mask:
2197 case Intrinsic::riscv_vlsseg4_mask:
2198 case Intrinsic::riscv_vlsseg5_mask:
2199 case Intrinsic::riscv_vlsseg6_mask:
2200 case Intrinsic::riscv_vlsseg7_mask:
2201 case Intrinsic::riscv_vlsseg8_mask: {
2202 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2203 /*IsStrided*/ true);
2204 return;
2205 }
2206 case Intrinsic::riscv_vloxseg2:
2207 case Intrinsic::riscv_vloxseg3:
2208 case Intrinsic::riscv_vloxseg4:
2209 case Intrinsic::riscv_vloxseg5:
2210 case Intrinsic::riscv_vloxseg6:
2211 case Intrinsic::riscv_vloxseg7:
2212 case Intrinsic::riscv_vloxseg8:
2213 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2214 /*IsOrdered*/ true);
2215 return;
2216 case Intrinsic::riscv_vluxseg2:
2217 case Intrinsic::riscv_vluxseg3:
2218 case Intrinsic::riscv_vluxseg4:
2219 case Intrinsic::riscv_vluxseg5:
2220 case Intrinsic::riscv_vluxseg6:
2221 case Intrinsic::riscv_vluxseg7:
2222 case Intrinsic::riscv_vluxseg8:
2223 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2224 /*IsOrdered*/ false);
2225 return;
2226 case Intrinsic::riscv_vloxseg2_mask:
2227 case Intrinsic::riscv_vloxseg3_mask:
2228 case Intrinsic::riscv_vloxseg4_mask:
2229 case Intrinsic::riscv_vloxseg5_mask:
2230 case Intrinsic::riscv_vloxseg6_mask:
2231 case Intrinsic::riscv_vloxseg7_mask:
2232 case Intrinsic::riscv_vloxseg8_mask:
2233 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2234 /*IsOrdered*/ true);
2235 return;
2236 case Intrinsic::riscv_vluxseg2_mask:
2237 case Intrinsic::riscv_vluxseg3_mask:
2238 case Intrinsic::riscv_vluxseg4_mask:
2239 case Intrinsic::riscv_vluxseg5_mask:
2240 case Intrinsic::riscv_vluxseg6_mask:
2241 case Intrinsic::riscv_vluxseg7_mask:
2242 case Intrinsic::riscv_vluxseg8_mask:
2243 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2244 /*IsOrdered*/ false);
2245 return;
2246 case Intrinsic::riscv_vlseg8ff:
2247 case Intrinsic::riscv_vlseg7ff:
2248 case Intrinsic::riscv_vlseg6ff:
2249 case Intrinsic::riscv_vlseg5ff:
2250 case Intrinsic::riscv_vlseg4ff:
2251 case Intrinsic::riscv_vlseg3ff:
2252 case Intrinsic::riscv_vlseg2ff: {
2253 selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ false);
2254 return;
2255 }
2256 case Intrinsic::riscv_vlseg8ff_mask:
2257 case Intrinsic::riscv_vlseg7ff_mask:
2258 case Intrinsic::riscv_vlseg6ff_mask:
2259 case Intrinsic::riscv_vlseg5ff_mask:
2260 case Intrinsic::riscv_vlseg4ff_mask:
2261 case Intrinsic::riscv_vlseg3ff_mask:
2262 case Intrinsic::riscv_vlseg2ff_mask: {
2263 selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ true);
2264 return;
2265 }
2266 case Intrinsic::riscv_vloxei:
2267 case Intrinsic::riscv_vloxei_mask:
2268 case Intrinsic::riscv_vluxei:
2269 case Intrinsic::riscv_vluxei_mask: {
2270 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
2271 IntNo == Intrinsic::riscv_vluxei_mask;
2272 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
2273 IntNo == Intrinsic::riscv_vloxei_mask;
2274
2275 MVT VT = Node->getSimpleValueType(0);
2276 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2277
2278 unsigned CurOp = 2;
2279 SmallVector<SDValue, 8> Operands;
2280 Operands.push_back(Node->getOperand(CurOp++));
2281
2282 MVT IndexVT;
2283 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2284 /*IsStridedOrIndexed*/ true, Operands,
2285 /*IsLoad=*/true, &IndexVT);
2286
2288 "Element count mismatch");
2289
2292 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2293 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2294 reportFatalUsageError("The V extension does not support EEW=64 for "
2295 "index values when XLEN=32");
2296 }
2297 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
2298 IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
2299 static_cast<unsigned>(IndexLMUL));
2300 MachineSDNode *Load =
2301 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2302
2303 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2304
2305 ReplaceNode(Node, Load);
2306 return;
2307 }
2308 case Intrinsic::riscv_vlm:
2309 case Intrinsic::riscv_vle:
2310 case Intrinsic::riscv_vle_mask:
2311 case Intrinsic::riscv_vlse:
2312 case Intrinsic::riscv_vlse_mask: {
2313 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
2314 IntNo == Intrinsic::riscv_vlse_mask;
2315 bool IsStrided =
2316 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
2317
2318 MVT VT = Node->getSimpleValueType(0);
2319 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2320
2321 // The riscv_vlm intrinsic are always tail agnostic and no passthru
2322 // operand at the IR level. In pseudos, they have both policy and
2323 // passthru operand. The passthru operand is needed to track the
2324 // "tail undefined" state, and the policy is there just for
2325 // for consistency - it will always be "don't care" for the
2326 // unmasked form.
2327 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
2328 unsigned CurOp = 2;
2329 SmallVector<SDValue, 8> Operands;
2330 if (HasPassthruOperand)
2331 Operands.push_back(Node->getOperand(CurOp++));
2332 else {
2333 // We eagerly lower to implicit_def (instead of undef), as we
2334 // otherwise fail to select nodes such as: nxv1i1 = undef
2335 SDNode *Passthru =
2336 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
2337 Operands.push_back(SDValue(Passthru, 0));
2338 }
2339 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2340 Operands, /*IsLoad=*/true);
2341
2343 const RISCV::VLEPseudo *P =
2344 RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW,
2345 static_cast<unsigned>(LMUL));
2346 MachineSDNode *Load =
2347 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2348
2349 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2350
2351 ReplaceNode(Node, Load);
2352 return;
2353 }
2354 case Intrinsic::riscv_vleff:
2355 case Intrinsic::riscv_vleff_mask: {
2356 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
2357
2358 MVT VT = Node->getSimpleValueType(0);
2359 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2360
2361 unsigned CurOp = 2;
2362 SmallVector<SDValue, 7> Operands;
2363 Operands.push_back(Node->getOperand(CurOp++));
2364 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2365 /*IsStridedOrIndexed*/ false, Operands,
2366 /*IsLoad=*/true);
2367
2369 const RISCV::VLEPseudo *P =
2370 RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true,
2371 Log2SEW, static_cast<unsigned>(LMUL));
2372 MachineSDNode *Load = CurDAG->getMachineNode(
2373 P->Pseudo, DL, Node->getVTList(), Operands);
2374 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2375
2376 ReplaceNode(Node, Load);
2377 return;
2378 }
2379 case Intrinsic::riscv_nds_vln:
2380 case Intrinsic::riscv_nds_vln_mask:
2381 case Intrinsic::riscv_nds_vlnu:
2382 case Intrinsic::riscv_nds_vlnu_mask: {
2383 bool IsMasked = IntNo == Intrinsic::riscv_nds_vln_mask ||
2384 IntNo == Intrinsic::riscv_nds_vlnu_mask;
2385 bool IsUnsigned = IntNo == Intrinsic::riscv_nds_vlnu ||
2386 IntNo == Intrinsic::riscv_nds_vlnu_mask;
2387
2388 MVT VT = Node->getSimpleValueType(0);
2389 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2390 unsigned CurOp = 2;
2391 SmallVector<SDValue, 8> Operands;
2392
2393 Operands.push_back(Node->getOperand(CurOp++));
2394 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2395 /*IsStridedOrIndexed=*/false, Operands,
2396 /*IsLoad=*/true);
2397
2399 const RISCV::NDSVLNPseudo *P = RISCV::getNDSVLNPseudo(
2400 IsMasked, IsUnsigned, Log2SEW, static_cast<unsigned>(LMUL));
2401 MachineSDNode *Load =
2402 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2403
2404 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2405 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
2406
2407 ReplaceNode(Node, Load);
2408 return;
2409 }
2410 }
2411 break;
2412 }
2413 case ISD::INTRINSIC_VOID: {
2414 unsigned IntNo = Node->getConstantOperandVal(1);
2415 switch (IntNo) {
2416 case Intrinsic::riscv_vsseg2:
2417 case Intrinsic::riscv_vsseg3:
2418 case Intrinsic::riscv_vsseg4:
2419 case Intrinsic::riscv_vsseg5:
2420 case Intrinsic::riscv_vsseg6:
2421 case Intrinsic::riscv_vsseg7:
2422 case Intrinsic::riscv_vsseg8: {
2423 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2424 /*IsStrided*/ false);
2425 return;
2426 }
2427 case Intrinsic::riscv_vsseg2_mask:
2428 case Intrinsic::riscv_vsseg3_mask:
2429 case Intrinsic::riscv_vsseg4_mask:
2430 case Intrinsic::riscv_vsseg5_mask:
2431 case Intrinsic::riscv_vsseg6_mask:
2432 case Intrinsic::riscv_vsseg7_mask:
2433 case Intrinsic::riscv_vsseg8_mask: {
2434 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2435 /*IsStrided*/ false);
2436 return;
2437 }
2438 case Intrinsic::riscv_vssseg2:
2439 case Intrinsic::riscv_vssseg3:
2440 case Intrinsic::riscv_vssseg4:
2441 case Intrinsic::riscv_vssseg5:
2442 case Intrinsic::riscv_vssseg6:
2443 case Intrinsic::riscv_vssseg7:
2444 case Intrinsic::riscv_vssseg8: {
2445 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2446 /*IsStrided*/ true);
2447 return;
2448 }
2449 case Intrinsic::riscv_vssseg2_mask:
2450 case Intrinsic::riscv_vssseg3_mask:
2451 case Intrinsic::riscv_vssseg4_mask:
2452 case Intrinsic::riscv_vssseg5_mask:
2453 case Intrinsic::riscv_vssseg6_mask:
2454 case Intrinsic::riscv_vssseg7_mask:
2455 case Intrinsic::riscv_vssseg8_mask: {
2456 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2457 /*IsStrided*/ true);
2458 return;
2459 }
2460 case Intrinsic::riscv_vsoxseg2:
2461 case Intrinsic::riscv_vsoxseg3:
2462 case Intrinsic::riscv_vsoxseg4:
2463 case Intrinsic::riscv_vsoxseg5:
2464 case Intrinsic::riscv_vsoxseg6:
2465 case Intrinsic::riscv_vsoxseg7:
2466 case Intrinsic::riscv_vsoxseg8:
2467 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2468 /*IsOrdered*/ true);
2469 return;
2470 case Intrinsic::riscv_vsuxseg2:
2471 case Intrinsic::riscv_vsuxseg3:
2472 case Intrinsic::riscv_vsuxseg4:
2473 case Intrinsic::riscv_vsuxseg5:
2474 case Intrinsic::riscv_vsuxseg6:
2475 case Intrinsic::riscv_vsuxseg7:
2476 case Intrinsic::riscv_vsuxseg8:
2477 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2478 /*IsOrdered*/ false);
2479 return;
2480 case Intrinsic::riscv_vsoxseg2_mask:
2481 case Intrinsic::riscv_vsoxseg3_mask:
2482 case Intrinsic::riscv_vsoxseg4_mask:
2483 case Intrinsic::riscv_vsoxseg5_mask:
2484 case Intrinsic::riscv_vsoxseg6_mask:
2485 case Intrinsic::riscv_vsoxseg7_mask:
2486 case Intrinsic::riscv_vsoxseg8_mask:
2487 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2488 /*IsOrdered*/ true);
2489 return;
2490 case Intrinsic::riscv_vsuxseg2_mask:
2491 case Intrinsic::riscv_vsuxseg3_mask:
2492 case Intrinsic::riscv_vsuxseg4_mask:
2493 case Intrinsic::riscv_vsuxseg5_mask:
2494 case Intrinsic::riscv_vsuxseg6_mask:
2495 case Intrinsic::riscv_vsuxseg7_mask:
2496 case Intrinsic::riscv_vsuxseg8_mask:
2497 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2498 /*IsOrdered*/ false);
2499 return;
2500 case Intrinsic::riscv_vsoxei:
2501 case Intrinsic::riscv_vsoxei_mask:
2502 case Intrinsic::riscv_vsuxei:
2503 case Intrinsic::riscv_vsuxei_mask: {
2504 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
2505 IntNo == Intrinsic::riscv_vsuxei_mask;
2506 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
2507 IntNo == Intrinsic::riscv_vsoxei_mask;
2508
2509 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2510 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2511
2512 unsigned CurOp = 2;
2513 SmallVector<SDValue, 8> Operands;
2514 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2515
2516 MVT IndexVT;
2517 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2518 /*IsStridedOrIndexed*/ true, Operands,
2519 /*IsLoad=*/false, &IndexVT);
2520
2522 "Element count mismatch");
2523
2526 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2527 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2528 reportFatalUsageError("The V extension does not support EEW=64 for "
2529 "index values when XLEN=32");
2530 }
2531 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
2532 IsMasked, IsOrdered, IndexLog2EEW,
2533 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
2534 MachineSDNode *Store =
2535 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2536
2537 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
2538
2539 ReplaceNode(Node, Store);
2540 return;
2541 }
2542 case Intrinsic::riscv_vsm:
2543 case Intrinsic::riscv_vse:
2544 case Intrinsic::riscv_vse_mask:
2545 case Intrinsic::riscv_vsse:
2546 case Intrinsic::riscv_vsse_mask: {
2547 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
2548 IntNo == Intrinsic::riscv_vsse_mask;
2549 bool IsStrided =
2550 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
2551
2552 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2553 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2554
2555 unsigned CurOp = 2;
2556 SmallVector<SDValue, 8> Operands;
2557 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2558
2559 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2560 Operands);
2561
2563 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
2564 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
2565 MachineSDNode *Store =
2566 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2567 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
2568
2569 ReplaceNode(Node, Store);
2570 return;
2571 }
2572 case Intrinsic::riscv_sf_vc_x_se:
2573 case Intrinsic::riscv_sf_vc_i_se:
2575 return;
2576 case Intrinsic::riscv_sf_vlte8:
2577 case Intrinsic::riscv_sf_vlte16:
2578 case Intrinsic::riscv_sf_vlte32:
2579 case Intrinsic::riscv_sf_vlte64: {
2580 unsigned Log2SEW;
2581 unsigned PseudoInst;
2582 switch (IntNo) {
2583 case Intrinsic::riscv_sf_vlte8:
2584 PseudoInst = RISCV::PseudoSF_VLTE8;
2585 Log2SEW = 3;
2586 break;
2587 case Intrinsic::riscv_sf_vlte16:
2588 PseudoInst = RISCV::PseudoSF_VLTE16;
2589 Log2SEW = 4;
2590 break;
2591 case Intrinsic::riscv_sf_vlte32:
2592 PseudoInst = RISCV::PseudoSF_VLTE32;
2593 Log2SEW = 5;
2594 break;
2595 case Intrinsic::riscv_sf_vlte64:
2596 PseudoInst = RISCV::PseudoSF_VLTE64;
2597 Log2SEW = 6;
2598 break;
2599 }
2600
2601 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
2602 SDValue TWidenOp = CurDAG->getTargetConstant(1, DL, XLenVT);
2603 SDValue Operands[] = {Node->getOperand(2),
2604 Node->getOperand(3),
2605 Node->getOperand(4),
2606 SEWOp,
2607 TWidenOp,
2608 Node->getOperand(0)};
2609
2610 MachineSDNode *TileLoad =
2611 CurDAG->getMachineNode(PseudoInst, DL, Node->getVTList(), Operands);
2612 CurDAG->setNodeMemRefs(TileLoad,
2613 {cast<MemSDNode>(Node)->getMemOperand()});
2614
2615 ReplaceNode(Node, TileLoad);
2616 return;
2617 }
2618 case Intrinsic::riscv_sf_mm_s_s:
2619 case Intrinsic::riscv_sf_mm_s_u:
2620 case Intrinsic::riscv_sf_mm_u_s:
2621 case Intrinsic::riscv_sf_mm_u_u:
2622 case Intrinsic::riscv_sf_mm_e5m2_e5m2:
2623 case Intrinsic::riscv_sf_mm_e5m2_e4m3:
2624 case Intrinsic::riscv_sf_mm_e4m3_e5m2:
2625 case Intrinsic::riscv_sf_mm_e4m3_e4m3:
2626 case Intrinsic::riscv_sf_mm_f_f: {
2627 bool HasFRM = false;
2628 unsigned PseudoInst;
2629 switch (IntNo) {
2630 case Intrinsic::riscv_sf_mm_s_s:
2631 PseudoInst = RISCV::PseudoSF_MM_S_S;
2632 break;
2633 case Intrinsic::riscv_sf_mm_s_u:
2634 PseudoInst = RISCV::PseudoSF_MM_S_U;
2635 break;
2636 case Intrinsic::riscv_sf_mm_u_s:
2637 PseudoInst = RISCV::PseudoSF_MM_U_S;
2638 break;
2639 case Intrinsic::riscv_sf_mm_u_u:
2640 PseudoInst = RISCV::PseudoSF_MM_U_U;
2641 break;
2642 case Intrinsic::riscv_sf_mm_e5m2_e5m2:
2643 PseudoInst = RISCV::PseudoSF_MM_E5M2_E5M2;
2644 HasFRM = true;
2645 break;
2646 case Intrinsic::riscv_sf_mm_e5m2_e4m3:
2647 PseudoInst = RISCV::PseudoSF_MM_E5M2_E4M3;
2648 HasFRM = true;
2649 break;
2650 case Intrinsic::riscv_sf_mm_e4m3_e5m2:
2651 PseudoInst = RISCV::PseudoSF_MM_E4M3_E5M2;
2652 HasFRM = true;
2653 break;
2654 case Intrinsic::riscv_sf_mm_e4m3_e4m3:
2655 PseudoInst = RISCV::PseudoSF_MM_E4M3_E4M3;
2656 HasFRM = true;
2657 break;
2658 case Intrinsic::riscv_sf_mm_f_f:
2659 if (Node->getOperand(3).getValueType().getScalarType() == MVT::bf16)
2660 PseudoInst = RISCV::PseudoSF_MM_F_F_ALT;
2661 else
2662 PseudoInst = RISCV::PseudoSF_MM_F_F;
2663 HasFRM = true;
2664 break;
2665 }
2666 uint64_t TileNum = Node->getConstantOperandVal(2);
2667 SDValue Op1 = Node->getOperand(3);
2668 SDValue Op2 = Node->getOperand(4);
2669 MVT VT = Op1->getSimpleValueType(0);
2670 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2671 SDValue TmOp = Node->getOperand(5);
2672 SDValue TnOp = Node->getOperand(6);
2673 SDValue TkOp = Node->getOperand(7);
2674 SDValue TWidenOp = Node->getOperand(8);
2675 SDValue Chain = Node->getOperand(0);
2676
2677 // sf.mm.f.f with sew=32, twiden=2 is invalid
2678 if (IntNo == Intrinsic::riscv_sf_mm_f_f && Log2SEW == 5 &&
2679 TWidenOp->getAsZExtVal() == 2)
2680 reportFatalUsageError("sf.mm.f.f doesn't support (sew=32, twiden=2)");
2681
2682 SmallVector<SDValue, 10> Operands(
2683 {CurDAG->getRegister(getTileReg(TileNum), XLenVT), Op1, Op2});
2684 if (HasFRM)
2685 Operands.push_back(
2686 CurDAG->getTargetConstant(RISCVFPRndMode::DYN, DL, XLenVT));
2687 Operands.append({TmOp, TnOp, TkOp,
2688 CurDAG->getTargetConstant(Log2SEW, DL, XLenVT), TWidenOp,
2689 Chain});
2690
2691 auto *NewNode =
2692 CurDAG->getMachineNode(PseudoInst, DL, Node->getVTList(), Operands);
2693
2694 ReplaceNode(Node, NewNode);
2695 return;
2696 }
2697 case Intrinsic::riscv_sf_vtzero_t: {
2698 uint64_t TileNum = Node->getConstantOperandVal(2);
2699 SDValue Tm = Node->getOperand(3);
2700 SDValue Tn = Node->getOperand(4);
2701 SDValue Log2SEW = Node->getOperand(5);
2702 SDValue TWiden = Node->getOperand(6);
2703 SDValue Chain = Node->getOperand(0);
2704 auto *NewNode = CurDAG->getMachineNode(
2705 RISCV::PseudoSF_VTZERO_T, DL, Node->getVTList(),
2706 {CurDAG->getRegister(getTileReg(TileNum), XLenVT), Tm, Tn, Log2SEW,
2707 TWiden, Chain});
2708
2709 ReplaceNode(Node, NewNode);
2710 return;
2711 }
2712 }
2713 break;
2714 }
2715 case ISD::BITCAST: {
2716 MVT SrcVT = Node->getOperand(0).getSimpleValueType();
2717 // Just drop bitcasts between vectors if both are fixed or both are
2718 // scalable.
2719 if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
2720 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
2721 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2722 CurDAG->RemoveDeadNode(Node);
2723 return;
2724 }
2725 if (Subtarget->enablePExtCodeGen()) {
2726 bool Is32BitCast =
2727 (VT == MVT::i32 && (SrcVT == MVT::v4i8 || SrcVT == MVT::v2i16)) ||
2728 (SrcVT == MVT::i32 && (VT == MVT::v4i8 || VT == MVT::v2i16));
2729 bool Is64BitCast =
2730 (VT == MVT::i64 && (SrcVT == MVT::v8i8 || SrcVT == MVT::v4i16 ||
2731 SrcVT == MVT::v2i32)) ||
2732 (SrcVT == MVT::i64 &&
2733 (VT == MVT::v8i8 || VT == MVT::v4i16 || VT == MVT::v2i32));
2734 if (Is32BitCast || Is64BitCast) {
2735 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2736 CurDAG->RemoveDeadNode(Node);
2737 return;
2738 }
2739 }
2740 break;
2741 }
2743 if (Subtarget->enablePExtCodeGen()) {
2744 MVT SrcVT = Node->getOperand(0).getSimpleValueType();
2745 if ((VT == MVT::v2i32 && SrcVT == MVT::i64) ||
2746 (VT == MVT::v4i8 && SrcVT == MVT::i32)) {
2747 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2748 CurDAG->RemoveDeadNode(Node);
2749 return;
2750 }
2751 }
2752 break;
2754 case RISCVISD::TUPLE_INSERT: {
2755 SDValue V = Node->getOperand(0);
2756 SDValue SubV = Node->getOperand(1);
2757 SDLoc DL(SubV);
2758 auto Idx = Node->getConstantOperandVal(2);
2759 MVT SubVecVT = SubV.getSimpleValueType();
2760
2761 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2762 MVT SubVecContainerVT = SubVecVT;
2763 // Establish the correct scalable-vector types for any fixed-length type.
2764 if (SubVecVT.isFixedLengthVector()) {
2765 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
2767 [[maybe_unused]] bool ExactlyVecRegSized =
2768 Subtarget->expandVScale(SubVecVT.getSizeInBits())
2769 .isKnownMultipleOf(Subtarget->expandVScale(VecRegSize));
2770 assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())
2771 .getKnownMinValue()));
2772 assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
2773 }
2774 MVT ContainerVT = VT;
2775 if (VT.isFixedLengthVector())
2776 ContainerVT = TLI.getContainerForFixedLengthVector(VT);
2777
2778 const auto *TRI = Subtarget->getRegisterInfo();
2779 unsigned SubRegIdx;
2780 std::tie(SubRegIdx, Idx) =
2782 ContainerVT, SubVecContainerVT, Idx, TRI);
2783
2784 // If the Idx hasn't been completely eliminated then this is a subvector
2785 // insert which doesn't naturally align to a vector register. These must
2786 // be handled using instructions to manipulate the vector registers.
2787 if (Idx != 0)
2788 break;
2789
2790 RISCVVType::VLMUL SubVecLMUL =
2791 RISCVTargetLowering::getLMUL(SubVecContainerVT);
2792 [[maybe_unused]] bool IsSubVecPartReg =
2793 SubVecLMUL == RISCVVType::VLMUL::LMUL_F2 ||
2794 SubVecLMUL == RISCVVType::VLMUL::LMUL_F4 ||
2795 SubVecLMUL == RISCVVType::VLMUL::LMUL_F8;
2796 assert((V.getValueType().isRISCVVectorTuple() || !IsSubVecPartReg ||
2797 V.isUndef()) &&
2798 "Expecting lowering to have created legal INSERT_SUBVECTORs when "
2799 "the subvector is smaller than a full-sized register");
2800
2801 // If we haven't set a SubRegIdx, then we must be going between
2802 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
2803 if (SubRegIdx == RISCV::NoSubRegister) {
2804 unsigned InRegClassID =
2807 InRegClassID &&
2808 "Unexpected subvector extraction");
2809 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2810 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
2811 DL, VT, SubV, RC);
2812 ReplaceNode(Node, NewNode);
2813 return;
2814 }
2815
2816 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV);
2817 ReplaceNode(Node, Insert.getNode());
2818 return;
2819 }
2821 case RISCVISD::TUPLE_EXTRACT: {
2822 SDValue V = Node->getOperand(0);
2823 auto Idx = Node->getConstantOperandVal(1);
2824 MVT InVT = V.getSimpleValueType();
2825 SDLoc DL(V);
2826
2827 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2828 MVT SubVecContainerVT = VT;
2829 // Establish the correct scalable-vector types for any fixed-length type.
2830 if (VT.isFixedLengthVector()) {
2831 assert(Idx == 0);
2832 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
2833 }
2834 if (InVT.isFixedLengthVector())
2835 InVT = TLI.getContainerForFixedLengthVector(InVT);
2836
2837 const auto *TRI = Subtarget->getRegisterInfo();
2838 unsigned SubRegIdx;
2839 std::tie(SubRegIdx, Idx) =
2841 InVT, SubVecContainerVT, Idx, TRI);
2842
2843 // If the Idx hasn't been completely eliminated then this is a subvector
2844 // extract which doesn't naturally align to a vector register. These must
2845 // be handled using instructions to manipulate the vector registers.
2846 if (Idx != 0)
2847 break;
2848
2849 // If we haven't set a SubRegIdx, then we must be going between
2850 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
2851 if (SubRegIdx == RISCV::NoSubRegister) {
2852 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT);
2854 InRegClassID &&
2855 "Unexpected subvector extraction");
2856 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2857 SDNode *NewNode =
2858 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
2859 ReplaceNode(Node, NewNode);
2860 return;
2861 }
2862
2863 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
2864 ReplaceNode(Node, Extract.getNode());
2865 return;
2866 }
2867 case RISCVISD::VMV_S_X_VL:
2868 case RISCVISD::VFMV_S_F_VL:
2869 case RISCVISD::VMV_V_X_VL:
2870 case RISCVISD::VFMV_V_F_VL: {
2871 // Try to match splat of a scalar load to a strided load with stride of x0.
2872 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
2873 Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
2874 if (!Node->getOperand(0).isUndef())
2875 break;
2876 SDValue Src = Node->getOperand(1);
2877 auto *Ld = dyn_cast<LoadSDNode>(Src);
2878 // Can't fold load update node because the second
2879 // output is used so that load update node can't be removed.
2880 if (!Ld || Ld->isIndexed())
2881 break;
2882 EVT MemVT = Ld->getMemoryVT();
2883 // The memory VT should be the same size as the element type.
2884 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
2885 break;
2886 if (!IsProfitableToFold(Src, Node, Node) ||
2887 !IsLegalToFold(Src, Node, Node, TM.getOptLevel()))
2888 break;
2889
2890 SDValue VL;
2891 if (IsScalarMove) {
2892 // We could deal with more VL if we update the VSETVLI insert pass to
2893 // avoid introducing more VSETVLI.
2894 if (!isOneConstant(Node->getOperand(2)))
2895 break;
2896 selectVLOp(Node->getOperand(2), VL);
2897 } else
2898 selectVLOp(Node->getOperand(2), VL);
2899
2900 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2901 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
2902
2903 // If VL=1, then we don't need to do a strided load and can just do a
2904 // regular load.
2905 bool IsStrided = !isOneConstant(VL);
2906
2907 // Only do a strided load if we have optimized zero-stride vector load.
2908 if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
2909 break;
2910
2911 SmallVector<SDValue> Operands = {
2912 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0),
2913 Ld->getBasePtr()};
2914 if (IsStrided)
2915 Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT));
2917 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
2918 Operands.append({VL, SEW, PolicyOp, Ld->getChain()});
2919
2921 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
2922 /*IsMasked*/ false, IsStrided, /*FF*/ false,
2923 Log2SEW, static_cast<unsigned>(LMUL));
2924 MachineSDNode *Load =
2925 CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands);
2926 // Update the chain.
2927 ReplaceUses(Src.getValue(1), SDValue(Load, 1));
2928 // Record the mem-refs
2929 CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});
2930 // Replace the splat with the vlse.
2931 ReplaceNode(Node, Load);
2932 return;
2933 }
2934 case ISD::PREFETCH:
2935 unsigned Locality = Node->getConstantOperandVal(3);
2936 if (Locality > 2)
2937 break;
2938
2939 auto *LoadStoreMem = cast<MemSDNode>(Node);
2940 MachineMemOperand *MMO = LoadStoreMem->getMemOperand();
2942
2943 int NontemporalLevel = 0;
2944 switch (Locality) {
2945 case 0:
2946 NontemporalLevel = 3; // NTL.ALL
2947 break;
2948 case 1:
2949 NontemporalLevel = 1; // NTL.PALL
2950 break;
2951 case 2:
2952 NontemporalLevel = 0; // NTL.P1
2953 break;
2954 default:
2955 llvm_unreachable("unexpected locality value.");
2956 }
2957
2958 if (NontemporalLevel & 0b1)
2960 if (NontemporalLevel & 0b10)
2962 break;
2963 }
2964
2965 // Select the default instruction.
2966 SelectCode(Node);
2967}
2968
2970 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
2971 std::vector<SDValue> &OutOps) {
2972 // Always produce a register and immediate operand, as expected by
2973 // RISCVAsmPrinter::PrintAsmMemoryOperand.
2974 switch (ConstraintID) {
2977 SDValue Op0, Op1;
2978 [[maybe_unused]] bool Found = SelectAddrRegImm(Op, Op0, Op1);
2979 assert(Found && "SelectAddrRegImm should always succeed");
2980 OutOps.push_back(Op0);
2981 OutOps.push_back(Op1);
2982 return false;
2983 }
2985 OutOps.push_back(Op);
2986 OutOps.push_back(
2987 CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT()));
2988 return false;
2989 default:
2990 report_fatal_error("Unexpected asm memory constraint " +
2991 InlineAsm::getMemConstraintName(ConstraintID));
2992 }
2993
2994 return true;
2995}
2996
2998 SDValue &Offset) {
2999 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
3000 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
3001 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT());
3002 return true;
3003 }
3004
3005 return false;
3006}
3007
3008// Fold constant addresses.
3009static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
3010 const MVT VT, const RISCVSubtarget *Subtarget,
3012 bool IsPrefetch = false) {
3013 if (!isa<ConstantSDNode>(Addr))
3014 return false;
3015
3016 int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue();
3017
3018 // If the constant is a simm12, we can fold the whole constant and use X0 as
3019 // the base. If the constant can be materialized with LUI+simm12, use LUI as
3020 // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
3021 int64_t Lo12 = SignExtend64<12>(CVal);
3022 int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
3023 if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
3024 if (IsPrefetch && (Lo12 & 0b11111) != 0)
3025 return false;
3026 if (Hi) {
3027 int64_t Hi20 = (Hi >> 12) & 0xfffff;
3028 Base = SDValue(
3029 CurDAG->getMachineNode(RISCV::LUI, DL, VT,
3030 CurDAG->getTargetConstant(Hi20, DL, VT)),
3031 0);
3032 } else {
3033 Base = CurDAG->getRegister(RISCV::X0, VT);
3034 }
3035 Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT);
3036 return true;
3037 }
3038
3039 // Ask how constant materialization would handle this constant.
3040 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget);
3041
3042 // If the last instruction would be an ADDI, we can fold its immediate and
3043 // emit the rest of the sequence as the base.
3044 if (Seq.back().getOpcode() != RISCV::ADDI)
3045 return false;
3046 Lo12 = Seq.back().getImm();
3047 if (IsPrefetch && (Lo12 & 0b11111) != 0)
3048 return false;
3049
3050 // Drop the last instruction.
3051 Seq.pop_back();
3052 assert(!Seq.empty() && "Expected more instructions in sequence");
3053
3054 Base = selectImmSeq(CurDAG, DL, VT, Seq);
3055 Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT);
3056 return true;
3057}
3058
3059// Is this ADD instruction only used as the base pointer of scalar loads and
3060// stores?
3062 for (auto *User : Add->users()) {
3063 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
3064 User->getOpcode() != RISCVISD::LD_RV32 &&
3065 User->getOpcode() != RISCVISD::SD_RV32 &&
3066 User->getOpcode() != ISD::ATOMIC_LOAD &&
3067 User->getOpcode() != ISD::ATOMIC_STORE)
3068 return false;
3069 EVT VT = cast<MemSDNode>(User)->getMemoryVT();
3070 if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
3071 VT != MVT::f64)
3072 return false;
3073 // Don't allow stores of the value. It must be used as the address.
3074 if (User->getOpcode() == ISD::STORE &&
3075 cast<StoreSDNode>(User)->getValue() == Add)
3076 return false;
3077 if (User->getOpcode() == ISD::ATOMIC_STORE &&
3078 cast<AtomicSDNode>(User)->getVal() == Add)
3079 return false;
3080 if (User->getOpcode() == RISCVISD::SD_RV32 &&
3081 (User->getOperand(0) == Add || User->getOperand(1) == Add))
3082 return false;
3083 if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering()))
3084 return false;
3085 }
3086
3087 return true;
3088}
3089
3091 switch (User->getOpcode()) {
3092 default:
3093 return false;
3094 case ISD::LOAD:
3095 case RISCVISD::LD_RV32:
3096 case ISD::ATOMIC_LOAD:
3097 break;
3098 case ISD::STORE:
3099 // Don't allow stores of Add. It must only be used as the address.
3100 if (cast<StoreSDNode>(User)->getValue() == Add)
3101 return false;
3102 break;
3103 case RISCVISD::SD_RV32:
3104 // Don't allow stores of Add. It must only be used as the address.
3105 if (User->getOperand(0) == Add || User->getOperand(1) == Add)
3106 return false;
3107 break;
3108 case ISD::ATOMIC_STORE:
3109 // Don't allow stores of Add. It must only be used as the address.
3110 if (cast<AtomicSDNode>(User)->getVal() == Add)
3111 return false;
3112 break;
3113 }
3114
3115 return true;
3116}
3117
3118// To prevent SelectAddrRegImm from folding offsets that conflict with the
3119// fusion of PseudoMovAddr, check if the offset of every use of a given address
3120// is within the alignment.
3122 Align Alignment) {
3123 assert(Addr->getOpcode() == RISCVISD::ADD_LO);
3124 for (auto *User : Addr->users()) {
3125 // If the user is a load or store, then the offset is 0 which is always
3126 // within alignment.
3127 if (isRegImmLoadOrStore(User, Addr))
3128 continue;
3129
3130 if (CurDAG->isBaseWithConstantOffset(SDValue(User, 0))) {
3131 int64_t CVal = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
3132 if (!isInt<12>(CVal) || Alignment <= CVal)
3133 return false;
3134
3135 // Make sure all uses are foldable load/stores.
3136 for (auto *AddUser : User->users())
3137 if (!isRegImmLoadOrStore(AddUser, SDValue(User, 0)))
3138 return false;
3139
3140 continue;
3141 }
3142
3143 return false;
3144 }
3145
3146 return true;
3147}
3148
3150 SDValue &Offset) {
3151 if (SelectAddrFrameIndex(Addr, Base, Offset))
3152 return true;
3153
3154 SDLoc DL(Addr);
3155 MVT VT = Addr.getSimpleValueType();
3156
3157 if (Addr.getOpcode() == RISCVISD::ADD_LO) {
3158 bool CanFold = true;
3159 // Unconditionally fold if operand 1 is not a global address (e.g.
3160 // externsymbol)
3161 if (auto *GA = dyn_cast<GlobalAddressSDNode>(Addr.getOperand(1))) {
3162 const DataLayout &DL = CurDAG->getDataLayout();
3163 Align Alignment = commonAlignment(
3164 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
3165 if (!areOffsetsWithinAlignment(Addr, Alignment))
3166 CanFold = false;
3167 }
3168 if (CanFold) {
3169 Base = Addr.getOperand(0);
3170 Offset = Addr.getOperand(1);
3171 return true;
3172 }
3173 }
3174
3175 if (CurDAG->isBaseWithConstantOffset(Addr)) {
3176 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3177 if (isInt<12>(CVal)) {
3178 Base = Addr.getOperand(0);
3179 if (Base.getOpcode() == RISCVISD::ADD_LO) {
3180 SDValue LoOperand = Base.getOperand(1);
3181 if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) {
3182 // If the Lo in (ADD_LO hi, lo) is a global variable's address
3183 // (its low part, really), then we can rely on the alignment of that
3184 // variable to provide a margin of safety before low part can overflow
3185 // the 12 bits of the load/store offset. Check if CVal falls within
3186 // that margin; if so (low part + CVal) can't overflow.
3187 const DataLayout &DL = CurDAG->getDataLayout();
3188 Align Alignment = commonAlignment(
3189 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
3190 if ((CVal == 0 || Alignment > CVal) &&
3191 areOffsetsWithinAlignment(Base, Alignment)) {
3192 int64_t CombinedOffset = CVal + GA->getOffset();
3193 Base = Base.getOperand(0);
3194 Offset = CurDAG->getTargetGlobalAddress(
3195 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(),
3196 CombinedOffset, GA->getTargetFlags());
3197 return true;
3198 }
3199 }
3200 }
3201
3202 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
3203 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
3204 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
3205 return true;
3206 }
3207 }
3208
3209 // Handle ADD with large immediates.
3210 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
3211 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3212 assert(!isInt<12>(CVal) && "simm12 not already handled?");
3213
3214 // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
3215 // an ADDI for part of the offset and fold the rest into the load/store.
3216 // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
3217 if (CVal >= -4096 && CVal <= 4094) {
3218 int64_t Adj = CVal < 0 ? -2048 : 2047;
3219 Base = SDValue(
3220 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
3221 CurDAG->getSignedTargetConstant(Adj, DL, VT)),
3222 0);
3223 Offset = CurDAG->getSignedTargetConstant(CVal - Adj, DL, VT);
3224 return true;
3225 }
3226
3227 // For larger immediates, we might be able to save one instruction from
3228 // constant materialization by folding the Lo12 bits of the immediate into
3229 // the address. We should only do this if the ADD is only used by loads and
3230 // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
3231 // separately with the full materialized immediate creating extra
3232 // instructions.
3233 if (isWorthFoldingAdd(Addr) &&
3234 selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
3235 Offset, /*IsPrefetch=*/false)) {
3236 // Insert an ADD instruction with the materialized Hi52 bits.
3237 Base = SDValue(
3238 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
3239 0);
3240 return true;
3241 }
3242 }
3243
3244 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
3245 /*IsPrefetch=*/false))
3246 return true;
3247
3248 Base = Addr;
3249 Offset = CurDAG->getTargetConstant(0, DL, VT);
3250 return true;
3251}
3252
3253/// Similar to SelectAddrRegImm, except that the offset is restricted to uimm9.
3255 SDValue &Offset) {
3256 if (SelectAddrFrameIndex(Addr, Base, Offset))
3257 return true;
3258
3259 SDLoc DL(Addr);
3260 MVT VT = Addr.getSimpleValueType();
3261
3262 if (CurDAG->isBaseWithConstantOffset(Addr)) {
3263 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3264 if (isUInt<9>(CVal)) {
3265 Base = Addr.getOperand(0);
3266
3267 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
3268 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
3269 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
3270 return true;
3271 }
3272 }
3273
3274 Base = Addr;
3275 Offset = CurDAG->getTargetConstant(0, DL, VT);
3276 return true;
3277}
3278
3279/// Similar to SelectAddrRegImm, except that the least significant 5 bits of
3280/// Offset should be all zeros.
3282 SDValue &Offset) {
3283 if (SelectAddrFrameIndex(Addr, Base, Offset))
3284 return true;
3285
3286 SDLoc DL(Addr);
3287 MVT VT = Addr.getSimpleValueType();
3288
3289 if (CurDAG->isBaseWithConstantOffset(Addr)) {
3290 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3291 if (isInt<12>(CVal)) {
3292 Base = Addr.getOperand(0);
3293
3294 // Early-out if not a valid offset.
3295 if ((CVal & 0b11111) != 0) {
3296 Base = Addr;
3297 Offset = CurDAG->getTargetConstant(0, DL, VT);
3298 return true;
3299 }
3300
3301 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
3302 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
3303 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
3304 return true;
3305 }
3306 }
3307
3308 // Handle ADD with large immediates.
3309 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
3310 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3311 assert(!isInt<12>(CVal) && "simm12 not already handled?");
3312
3313 // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
3314 // one instruction by folding adjustment (-2048 or 2016) into the address.
3315 if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {
3316 int64_t Adj = CVal < 0 ? -2048 : 2016;
3317 int64_t AdjustedOffset = CVal - Adj;
3318 Base =
3319 SDValue(CurDAG->getMachineNode(
3320 RISCV::ADDI, DL, VT, Addr.getOperand(0),
3321 CurDAG->getSignedTargetConstant(AdjustedOffset, DL, VT)),
3322 0);
3323 Offset = CurDAG->getSignedTargetConstant(Adj, DL, VT);
3324 return true;
3325 }
3326
3327 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
3328 Offset, /*IsPrefetch=*/true)) {
3329 // Insert an ADD instruction with the materialized Hi52 bits.
3330 Base = SDValue(
3331 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
3332 0);
3333 return true;
3334 }
3335 }
3336
3337 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
3338 /*IsPrefetch=*/true))
3339 return true;
3340
3341 Base = Addr;
3342 Offset = CurDAG->getTargetConstant(0, DL, VT);
3343 return true;
3344}
3345
3346/// Return true if this a load/store that we have a RegRegScale instruction for.
3348 const RISCVSubtarget &Subtarget) {
3349 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE)
3350 return false;
3351 EVT VT = cast<MemSDNode>(User)->getMemoryVT();
3352 if (!(VT.isScalarInteger() &&
3353 (Subtarget.hasVendorXTHeadMemIdx() || Subtarget.hasVendorXqcisls())) &&
3354 !((VT == MVT::f32 || VT == MVT::f64) &&
3355 Subtarget.hasVendorXTHeadFMemIdx()))
3356 return false;
3357 // Don't allow stores of the value. It must be used as the address.
3358 if (User->getOpcode() == ISD::STORE &&
3359 cast<StoreSDNode>(User)->getValue() == Add)
3360 return false;
3361
3362 return true;
3363}
3364
3365/// Is it profitable to fold this Add into RegRegScale load/store. If \p
3366/// Shift is non-null, then we have matched a shl+add. We allow reassociating
3367/// (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2)) if there is a
3368/// single addi and we don't have a SHXADD instruction we could use.
3369/// FIXME: May still need to check how many and what kind of users the SHL has.
3371 SDValue Add,
3372 SDValue Shift = SDValue()) {
3373 bool FoundADDI = false;
3374 for (auto *User : Add->users()) {
3375 if (isRegRegScaleLoadOrStore(User, Add, Subtarget))
3376 continue;
3377
3378 // Allow a single ADDI that is used by loads/stores if we matched a shift.
3379 if (!Shift || FoundADDI || User->getOpcode() != ISD::ADD ||
3381 !isInt<12>(cast<ConstantSDNode>(User->getOperand(1))->getSExtValue()))
3382 return false;
3383
3384 FoundADDI = true;
3385
3386 // If we have a SHXADD instruction, prefer that over reassociating an ADDI.
3387 assert(Shift.getOpcode() == ISD::SHL);
3388 unsigned ShiftAmt = Shift.getConstantOperandVal(1);
3389 if (Subtarget.hasShlAdd(ShiftAmt))
3390 return false;
3391
3392 // All users of the ADDI should be load/store.
3393 for (auto *ADDIUser : User->users())
3394 if (!isRegRegScaleLoadOrStore(ADDIUser, SDValue(User, 0), Subtarget))
3395 return false;
3396 }
3397
3398 return true;
3399}
3400
3402 unsigned MaxShiftAmount,
3403 SDValue &Base, SDValue &Index,
3404 SDValue &Scale) {
3405 if (Addr.getOpcode() != ISD::ADD)
3406 return false;
3407 SDValue LHS = Addr.getOperand(0);
3408 SDValue RHS = Addr.getOperand(1);
3409
3410 EVT VT = Addr.getSimpleValueType();
3411 auto SelectShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
3412 SDValue &Shift) {
3413 if (N.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(N.getOperand(1)))
3414 return false;
3415
3416 // Only match shifts by a value in range [0, MaxShiftAmount].
3417 unsigned ShiftAmt = N.getConstantOperandVal(1);
3418 if (ShiftAmt > MaxShiftAmount)
3419 return false;
3420
3421 Index = N.getOperand(0);
3422 Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT);
3423 return true;
3424 };
3425
3426 if (auto *C1 = dyn_cast<ConstantSDNode>(RHS)) {
3427 // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
3428 if (LHS.getOpcode() == ISD::ADD &&
3429 !isa<ConstantSDNode>(LHS.getOperand(1)) &&
3430 isInt<12>(C1->getSExtValue())) {
3431 if (SelectShl(LHS.getOperand(1), Index, Scale) &&
3432 isWorthFoldingIntoRegRegScale(*Subtarget, LHS, LHS.getOperand(1))) {
3433 SDValue C1Val = CurDAG->getTargetConstant(*C1->getConstantIntValue(),
3434 SDLoc(Addr), VT);
3435 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
3436 LHS.getOperand(0), C1Val),
3437 0);
3438 return true;
3439 }
3440
3441 // Add is commutative so we need to check both operands.
3442 if (SelectShl(LHS.getOperand(0), Index, Scale) &&
3443 isWorthFoldingIntoRegRegScale(*Subtarget, LHS, LHS.getOperand(0))) {
3444 SDValue C1Val = CurDAG->getTargetConstant(*C1->getConstantIntValue(),
3445 SDLoc(Addr), VT);
3446 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
3447 LHS.getOperand(1), C1Val),
3448 0);
3449 return true;
3450 }
3451 }
3452
3453 // Don't match add with constants.
3454 // FIXME: Is this profitable for large constants that have 0s in the lower
3455 // 12 bits that we can materialize with LUI?
3456 return false;
3457 }
3458
3459 // Try to match a shift on the RHS.
3460 if (SelectShl(RHS, Index, Scale)) {
3461 if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr, RHS))
3462 return false;
3463 Base = LHS;
3464 return true;
3465 }
3466
3467 // Try to match a shift on the LHS.
3468 if (SelectShl(LHS, Index, Scale)) {
3469 if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr, LHS))
3470 return false;
3471 Base = RHS;
3472 return true;
3473 }
3474
3475 if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr))
3476 return false;
3477
3478 Base = LHS;
3479 Index = RHS;
3480 Scale = CurDAG->getTargetConstant(0, SDLoc(Addr), VT);
3481 return true;
3482}
3483
3485 unsigned MaxShiftAmount,
3486 unsigned Bits, SDValue &Base,
3487 SDValue &Index,
3488 SDValue &Scale) {
3489 if (!SelectAddrRegRegScale(Addr, MaxShiftAmount, Base, Index, Scale))
3490 return false;
3491
3492 if (Index.getOpcode() == ISD::AND) {
3493 auto *C = dyn_cast<ConstantSDNode>(Index.getOperand(1));
3494 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
3495 Index = Index.getOperand(0);
3496 return true;
3497 }
3498 }
3499
3500 return false;
3501}
3502
3504 SDValue &Offset) {
3505 if (Addr.getOpcode() != ISD::ADD)
3506 return false;
3507
3508 if (isa<ConstantSDNode>(Addr.getOperand(1)))
3509 return false;
3510
3511 Base = Addr.getOperand(0);
3512 Offset = Addr.getOperand(1);
3513 return true;
3514}
3515
3517 SDValue &ShAmt) {
3518 ShAmt = N;
3519
3520 // Peek through zext.
3521 if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)
3522 ShAmt = ShAmt.getOperand(0);
3523
3524 // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
3525 // amount. If there is an AND on the shift amount, we can bypass it if it
3526 // doesn't affect any of those bits.
3527 if (ShAmt.getOpcode() == ISD::AND &&
3528 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
3529 const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
3530
3531 // Since the max shift amount is a power of 2 we can subtract 1 to make a
3532 // mask that covers the bits needed to represent all shift amounts.
3533 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
3534 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
3535
3536 if (ShMask.isSubsetOf(AndMask)) {
3537 ShAmt = ShAmt.getOperand(0);
3538 } else {
3539 // SimplifyDemandedBits may have optimized the mask so try restoring any
3540 // bits that are known zero.
3541 KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0));
3542 if (!ShMask.isSubsetOf(AndMask | Known.Zero))
3543 return true;
3544 ShAmt = ShAmt.getOperand(0);
3545 }
3546 }
3547
3548 if (ShAmt.getOpcode() == ISD::ADD &&
3549 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
3550 uint64_t Imm = ShAmt.getConstantOperandVal(1);
3551 // If we are shifting by X+N where N == 0 mod Size, then just shift by X
3552 // to avoid the ADD.
3553 if (Imm != 0 && Imm % ShiftWidth == 0) {
3554 ShAmt = ShAmt.getOperand(0);
3555 return true;
3556 }
3557 } else if (ShAmt.getOpcode() == ISD::SUB &&
3558 isa<ConstantSDNode>(ShAmt.getOperand(0))) {
3559 uint64_t Imm = ShAmt.getConstantOperandVal(0);
3560 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
3561 // generate a NEG instead of a SUB of a constant.
3562 if (Imm != 0 && Imm % ShiftWidth == 0) {
3563 SDLoc DL(ShAmt);
3564 EVT VT = ShAmt.getValueType();
3565 SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);
3566 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
3567 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
3568 ShAmt.getOperand(1));
3569 ShAmt = SDValue(Neg, 0);
3570 return true;
3571 }
3572 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3573 // to generate a NOT instead of a SUB of a constant.
3574 if (Imm % ShiftWidth == ShiftWidth - 1) {
3575 SDLoc DL(ShAmt);
3576 EVT VT = ShAmt.getValueType();
3577 MachineSDNode *Not = CurDAG->getMachineNode(
3578 RISCV::XORI, DL, VT, ShAmt.getOperand(1),
3579 CurDAG->getAllOnesConstant(DL, VT, /*isTarget=*/true));
3580 ShAmt = SDValue(Not, 0);
3581 return true;
3582 }
3583 }
3584
3585 return true;
3586}
3587
3588/// RISC-V doesn't have general instructions for integer setne/seteq, but we can
3589/// check for equality with 0. This function emits instructions that convert the
3590/// seteq/setne into something that can be compared with 0.
3591/// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.
3592/// ISD::SETNE).
3594 SDValue &Val) {
3595 assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
3596 "Unexpected condition code!");
3597
3598 // We're looking for a setcc.
3599 if (N->getOpcode() != ISD::SETCC)
3600 return false;
3601
3602 // Must be an equality comparison.
3603 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
3604 if (CCVal != ExpectedCCVal)
3605 return false;
3606
3607 SDValue LHS = N->getOperand(0);
3608 SDValue RHS = N->getOperand(1);
3609
3610 if (!LHS.getValueType().isScalarInteger())
3611 return false;
3612
3613 // If the RHS side is 0, we don't need any extra instructions, return the LHS.
3614 if (isNullConstant(RHS)) {
3615 Val = LHS;
3616 return true;
3617 }
3618
3619 SDLoc DL(N);
3620
3621 if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
3622 int64_t CVal = C->getSExtValue();
3623 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
3624 // non-zero otherwise.
3625 if (CVal == -2048) {
3626 Val = SDValue(
3627 CurDAG->getMachineNode(
3628 RISCV::XORI, DL, N->getValueType(0), LHS,
3629 CurDAG->getSignedTargetConstant(CVal, DL, N->getValueType(0))),
3630 0);
3631 return true;
3632 }
3633 // If the RHS is [-2047,2048], we can use addi/addiw with -RHS to produce 0
3634 // if the LHS is equal to the RHS and non-zero otherwise.
3635 if (isInt<12>(CVal) || CVal == 2048) {
3636 unsigned Opc = RISCV::ADDI;
3637 if (LHS.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3638 cast<VTSDNode>(LHS.getOperand(1))->getVT() == MVT::i32) {
3639 Opc = RISCV::ADDIW;
3640 LHS = LHS.getOperand(0);
3641 }
3642
3643 Val = SDValue(CurDAG->getMachineNode(Opc, DL, N->getValueType(0), LHS,
3644 CurDAG->getSignedTargetConstant(
3645 -CVal, DL, N->getValueType(0))),
3646 0);
3647 return true;
3648 }
3649 if (isPowerOf2_64(CVal) && Subtarget->hasStdExtZbs()) {
3650 Val = SDValue(
3651 CurDAG->getMachineNode(
3652 RISCV::BINVI, DL, N->getValueType(0), LHS,
3653 CurDAG->getTargetConstant(Log2_64(CVal), DL, N->getValueType(0))),
3654 0);
3655 return true;
3656 }
3657 // Same as the addi case above but for larger immediates (signed 26-bit) use
3658 // the QC_E_ADDI instruction from the Xqcilia extension, if available. Avoid
3659 // anything which can be done with a single lui as it might be compressible.
3660 if (Subtarget->hasVendorXqcilia() && isInt<26>(CVal) &&
3661 (CVal & 0xFFF) != 0) {
3662 Val = SDValue(
3663 CurDAG->getMachineNode(
3664 RISCV::QC_E_ADDI, DL, N->getValueType(0), LHS,
3665 CurDAG->getSignedTargetConstant(-CVal, DL, N->getValueType(0))),
3666 0);
3667 return true;
3668 }
3669 }
3670
3671 // If nothing else we can XOR the LHS and RHS to produce zero if they are
3672 // equal and a non-zero value if they aren't.
3673 Val = SDValue(
3674 CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0);
3675 return true;
3676}
3677
3679 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3680 cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) {
3681 Val = N.getOperand(0);
3682 return true;
3683 }
3684
3685 auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
3686 if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1)))
3687 return N;
3688
3689 SDValue N0 = N.getOperand(0);
3690 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
3691 N.getConstantOperandVal(1) == ShiftAmt &&
3692 N0.getConstantOperandVal(1) == ShiftAmt)
3693 return N0.getOperand(0);
3694
3695 return N;
3696 };
3697
3698 MVT VT = N.getSimpleValueType();
3699 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) {
3700 Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
3701 return true;
3702 }
3703
3704 return false;
3705}
3706
3708 if (N.getOpcode() == ISD::AND) {
3709 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
3710 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
3711 Val = N.getOperand(0);
3712 return true;
3713 }
3714 }
3715 MVT VT = N.getSimpleValueType();
3716 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits);
3717 if (CurDAG->MaskedValueIsZero(N, Mask)) {
3718 Val = N;
3719 return true;
3720 }
3721
3722 return false;
3723}
3724
3725/// Look for various patterns that can be done with a SHL that can be folded
3726/// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
3727/// SHXADD we are trying to match.
3729 SDValue &Val) {
3730 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
3731 SDValue N0 = N.getOperand(0);
3732
3733 if (bool LeftShift = N0.getOpcode() == ISD::SHL;
3734 (LeftShift || N0.getOpcode() == ISD::SRL) &&
3736 uint64_t Mask = N.getConstantOperandVal(1);
3737 unsigned C2 = N0.getConstantOperandVal(1);
3738
3739 unsigned XLen = Subtarget->getXLen();
3740 if (LeftShift)
3741 Mask &= maskTrailingZeros<uint64_t>(C2);
3742 else
3743 Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
3744
3745 if (isShiftedMask_64(Mask)) {
3746 unsigned Leading = XLen - llvm::bit_width(Mask);
3747 unsigned Trailing = llvm::countr_zero(Mask);
3748 if (Trailing != ShAmt)
3749 return false;
3750
3751 unsigned Opcode;
3752 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
3753 // leading zeros and c3 trailing zeros. We can use an SRLI by c3-c2
3754 // followed by a SHXADD with c3 for the X amount.
3755 if (LeftShift && Leading == 0 && C2 < Trailing)
3756 Opcode = RISCV::SRLI;
3757 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with 32-c2
3758 // leading zeros and c3 trailing zeros. We can use an SRLIW by c3-c2
3759 // followed by a SHXADD with c3 for the X amount.
3760 else if (LeftShift && Leading == 32 - C2 && C2 < Trailing)
3761 Opcode = RISCV::SRLIW;
3762 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
3763 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
3764 // followed by a SHXADD using c3 for the X amount.
3765 else if (!LeftShift && Leading == C2)
3766 Opcode = RISCV::SRLI;
3767 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with 32+c2
3768 // leading zeros and c3 trailing zeros. We can use an SRLIW by c2+c3
3769 // followed by a SHXADD using c3 for the X amount.
3770 else if (!LeftShift && Leading == 32 + C2)
3771 Opcode = RISCV::SRLIW;
3772 else
3773 return false;
3774
3775 SDLoc DL(N);
3776 EVT VT = N.getValueType();
3777 ShAmt = LeftShift ? Trailing - C2 : Trailing + C2;
3778 Val = SDValue(
3779 CurDAG->getMachineNode(Opcode, DL, VT, N0.getOperand(0),
3780 CurDAG->getTargetConstant(ShAmt, DL, VT)),
3781 0);
3782 return true;
3783 }
3784 } else if (N0.getOpcode() == ISD::SRA && N0.hasOneUse() &&
3786 uint64_t Mask = N.getConstantOperandVal(1);
3787 unsigned C2 = N0.getConstantOperandVal(1);
3788
3789 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
3790 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
3791 // use (srli (srai y, c2 - c3), c3 + c4) followed by a SHXADD with c4 as
3792 // the X amount.
3793 if (isShiftedMask_64(Mask)) {
3794 unsigned XLen = Subtarget->getXLen();
3795 unsigned Leading = XLen - llvm::bit_width(Mask);
3796 unsigned Trailing = llvm::countr_zero(Mask);
3797 if (C2 > Leading && Leading > 0 && Trailing == ShAmt) {
3798 SDLoc DL(N);
3799 EVT VT = N.getValueType();
3800 Val = SDValue(CurDAG->getMachineNode(
3801 RISCV::SRAI, DL, VT, N0.getOperand(0),
3802 CurDAG->getTargetConstant(C2 - Leading, DL, VT)),
3803 0);
3804 Val = SDValue(CurDAG->getMachineNode(
3805 RISCV::SRLI, DL, VT, Val,
3806 CurDAG->getTargetConstant(Leading + ShAmt, DL, VT)),
3807 0);
3808 return true;
3809 }
3810 }
3811 }
3812 } else if (bool LeftShift = N.getOpcode() == ISD::SHL;
3813 (LeftShift || N.getOpcode() == ISD::SRL) &&
3814 isa<ConstantSDNode>(N.getOperand(1))) {
3815 SDValue N0 = N.getOperand(0);
3816 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
3818 uint64_t Mask = N0.getConstantOperandVal(1);
3819 if (isShiftedMask_64(Mask)) {
3820 unsigned C1 = N.getConstantOperandVal(1);
3821 unsigned XLen = Subtarget->getXLen();
3822 unsigned Leading = XLen - llvm::bit_width(Mask);
3823 unsigned Trailing = llvm::countr_zero(Mask);
3824 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
3825 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
3826 if (LeftShift && Leading == 32 && Trailing > 0 &&
3827 (Trailing + C1) == ShAmt) {
3828 SDLoc DL(N);
3829 EVT VT = N.getValueType();
3830 Val = SDValue(CurDAG->getMachineNode(
3831 RISCV::SRLIW, DL, VT, N0.getOperand(0),
3832 CurDAG->getTargetConstant(Trailing, DL, VT)),
3833 0);
3834 return true;
3835 }
3836 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
3837 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
3838 if (!LeftShift && Leading == 32 && Trailing > C1 &&
3839 (Trailing - C1) == ShAmt) {
3840 SDLoc DL(N);
3841 EVT VT = N.getValueType();
3842 Val = SDValue(CurDAG->getMachineNode(
3843 RISCV::SRLIW, DL, VT, N0.getOperand(0),
3844 CurDAG->getTargetConstant(Trailing, DL, VT)),
3845 0);
3846 return true;
3847 }
3848 }
3849 }
3850 }
3851
3852 return false;
3853}
3854
3855/// Look for various patterns that can be done with a SHL that can be folded
3856/// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
3857/// SHXADD_UW we are trying to match.
3859 SDValue &Val) {
3860 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) &&
3861 N.hasOneUse()) {
3862 SDValue N0 = N.getOperand(0);
3863 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
3864 N0.hasOneUse()) {
3865 uint64_t Mask = N.getConstantOperandVal(1);
3866 unsigned C2 = N0.getConstantOperandVal(1);
3867
3868 Mask &= maskTrailingZeros<uint64_t>(C2);
3869
3870 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
3871 // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
3872 // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
3873 if (isShiftedMask_64(Mask)) {
3874 unsigned Leading = llvm::countl_zero(Mask);
3875 unsigned Trailing = llvm::countr_zero(Mask);
3876 if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
3877 SDLoc DL(N);
3878 EVT VT = N.getValueType();
3879 Val = SDValue(CurDAG->getMachineNode(
3880 RISCV::SLLI, DL, VT, N0.getOperand(0),
3881 CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)),
3882 0);
3883 return true;
3884 }
3885 }
3886 }
3887 }
3888
3889 return false;
3890}
3891
3893 assert(N->getOpcode() == ISD::OR || N->getOpcode() == RISCVISD::OR_VL);
3894 if (N->getFlags().hasDisjoint())
3895 return true;
3896 return CurDAG->haveNoCommonBitsSet(N->getOperand(0), N->getOperand(1));
3897}
3898
3899bool RISCVDAGToDAGISel::selectImm64IfCheaper(int64_t Imm, int64_t OrigImm,
3900 SDValue N, SDValue &Val) {
3901 int OrigCost = RISCVMatInt::getIntMatCost(APInt(64, OrigImm), 64, *Subtarget,
3902 /*CompressionCost=*/true);
3903 int Cost = RISCVMatInt::getIntMatCost(APInt(64, Imm), 64, *Subtarget,
3904 /*CompressionCost=*/true);
3905 if (OrigCost <= Cost)
3906 return false;
3907
3908 Val = selectImm(CurDAG, SDLoc(N), N->getSimpleValueType(0), Imm, *Subtarget);
3909 return true;
3910}
3911
3913 if (!isa<ConstantSDNode>(N))
3914 return false;
3915 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
3916 if ((Imm >> 31) != 1)
3917 return false;
3918
3919 for (const SDNode *U : N->users()) {
3920 switch (U->getOpcode()) {
3921 case ISD::ADD:
3922 break;
3923 case ISD::OR:
3924 if (orDisjoint(U))
3925 break;
3926 return false;
3927 default:
3928 return false;
3929 }
3930 }
3931
3932 return selectImm64IfCheaper(0xffffffff00000000 | Imm, Imm, N, Val);
3933}
3934
3936 if (!isa<ConstantSDNode>(N))
3937 return false;
3938 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
3939 if (isInt<32>(Imm))
3940 return false;
3941
3942 for (const SDNode *U : N->users()) {
3943 switch (U->getOpcode()) {
3944 case ISD::ADD:
3945 break;
3946 case RISCVISD::VMV_V_X_VL:
3947 if (!all_of(U->users(), [](const SDNode *V) {
3948 return V->getOpcode() == ISD::ADD ||
3949 V->getOpcode() == RISCVISD::ADD_VL;
3950 }))
3951 return false;
3952 break;
3953 default:
3954 return false;
3955 }
3956 }
3957
3958 return selectImm64IfCheaper(-Imm, Imm, N, Val);
3959}
3960
3962 if (!isa<ConstantSDNode>(N))
3963 return false;
3964 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
3965
3966 // For 32-bit signed constants, we can only substitute LUI+ADDI with LUI.
3967 if (isInt<32>(Imm) && ((Imm & 0xfff) != 0xfff || Imm == -1))
3968 return false;
3969
3970 // Abandon this transform if the constant is needed elsewhere.
3971 for (const SDNode *U : N->users()) {
3972 switch (U->getOpcode()) {
3973 case ISD::AND:
3974 case ISD::OR:
3975 case ISD::XOR:
3976 if (!(Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbkb()))
3977 return false;
3978 break;
3979 case RISCVISD::VMV_V_X_VL:
3980 if (!Subtarget->hasStdExtZvkb())
3981 return false;
3982 if (!all_of(U->users(), [](const SDNode *V) {
3983 return V->getOpcode() == ISD::AND ||
3984 V->getOpcode() == RISCVISD::AND_VL;
3985 }))
3986 return false;
3987 break;
3988 default:
3989 return false;
3990 }
3991 }
3992
3993 if (isInt<32>(Imm)) {
3994 Val =
3995 selectImm(CurDAG, SDLoc(N), N->getSimpleValueType(0), ~Imm, *Subtarget);
3996 return true;
3997 }
3998
3999 // For 64-bit constants, the instruction sequences get complex,
4000 // so we select inverted only if it's cheaper.
4001 return selectImm64IfCheaper(~Imm, Imm, N, Val);
4002}
4003
4004static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
4005 unsigned Bits,
4006 const TargetInstrInfo *TII) {
4007 unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode());
4008
4009 if (!MCOpcode)
4010 return false;
4011
4012 const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
4013 const uint64_t TSFlags = MCID.TSFlags;
4014 if (!RISCVII::hasSEWOp(TSFlags))
4015 return false;
4016 assert(RISCVII::hasVLOp(TSFlags));
4017
4018 unsigned ChainOpIdx = User->getNumOperands() - 1;
4019 bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other;
4020 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
4021 unsigned VLIdx = User->getNumOperands() - HasVecPolicyOp - HasChainOp - 2;
4022 const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1);
4023
4024 if (UserOpNo == VLIdx)
4025 return false;
4026
4027 auto NumDemandedBits =
4028 RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW);
4029 return NumDemandedBits && Bits >= *NumDemandedBits;
4030}
4031
4032// Return true if all users of this SDNode* only consume the lower \p Bits.
4033// This can be used to form W instructions for add/sub/mul/shl even when the
4034// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
4035// SimplifyDemandedBits has made it so some users see a sext_inreg and some
4036// don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
4037// the add/sub/mul/shl to become non-W instructions. By checking the users we
4038// may be able to use a W instruction and CSE with the other instruction if
4039// this has happened. We could try to detect that the CSE opportunity exists
4040// before doing this, but that would be more complicated.
4042 const unsigned Depth) const {
4043 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
4044 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
4045 Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
4046 Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
4047 Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
4048 isa<ConstantSDNode>(Node) || Depth != 0) &&
4049 "Unexpected opcode");
4050
4052 return false;
4053
4054 // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
4055 // the VT. Ensure the type is scalar to avoid wasting time on vectors.
4056 if (Depth == 0 && !Node->getValueType(0).isScalarInteger())
4057 return false;
4058
4059 for (SDUse &Use : Node->uses()) {
4060 SDNode *User = Use.getUser();
4061 // Users of this node should have already been instruction selected
4062 if (!User->isMachineOpcode())
4063 return false;
4064
4065 // TODO: Add more opcodes?
4066 switch (User->getMachineOpcode()) {
4067 default:
4069 break;
4070 return false;
4071 case RISCV::ADDW:
4072 case RISCV::ADDIW:
4073 case RISCV::SUBW:
4074 case RISCV::MULW:
4075 case RISCV::SLLW:
4076 case RISCV::SLLIW:
4077 case RISCV::SRAW:
4078 case RISCV::SRAIW:
4079 case RISCV::SRLW:
4080 case RISCV::SRLIW:
4081 case RISCV::DIVW:
4082 case RISCV::DIVUW:
4083 case RISCV::REMW:
4084 case RISCV::REMUW:
4085 case RISCV::ROLW:
4086 case RISCV::RORW:
4087 case RISCV::RORIW:
4088 case RISCV::CLZW:
4089 case RISCV::CTZW:
4090 case RISCV::CPOPW:
4091 case RISCV::SLLI_UW:
4092 case RISCV::ABSW:
4093 case RISCV::FMV_W_X:
4094 case RISCV::FCVT_H_W:
4095 case RISCV::FCVT_H_W_INX:
4096 case RISCV::FCVT_H_WU:
4097 case RISCV::FCVT_H_WU_INX:
4098 case RISCV::FCVT_S_W:
4099 case RISCV::FCVT_S_W_INX:
4100 case RISCV::FCVT_S_WU:
4101 case RISCV::FCVT_S_WU_INX:
4102 case RISCV::FCVT_D_W:
4103 case RISCV::FCVT_D_W_INX:
4104 case RISCV::FCVT_D_WU:
4105 case RISCV::FCVT_D_WU_INX:
4106 case RISCV::TH_REVW:
4107 case RISCV::TH_SRRIW:
4108 if (Bits >= 32)
4109 break;
4110 return false;
4111 case RISCV::SLL:
4112 case RISCV::SRA:
4113 case RISCV::SRL:
4114 case RISCV::ROL:
4115 case RISCV::ROR:
4116 case RISCV::BSET:
4117 case RISCV::BCLR:
4118 case RISCV::BINV:
4119 // Shift amount operands only use log2(Xlen) bits.
4120 if (Use.getOperandNo() == 1 && Bits >= Log2_32(Subtarget->getXLen()))
4121 break;
4122 return false;
4123 case RISCV::SLLI:
4124 // SLLI only uses the lower (XLen - ShAmt) bits.
4125 if (Bits >= Subtarget->getXLen() - User->getConstantOperandVal(1))
4126 break;
4127 return false;
4128 case RISCV::ANDI:
4129 if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1)))
4130 break;
4131 goto RecCheck;
4132 case RISCV::ORI: {
4133 uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
4134 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
4135 break;
4136 [[fallthrough]];
4137 }
4138 case RISCV::AND:
4139 case RISCV::OR:
4140 case RISCV::XOR:
4141 case RISCV::XORI:
4142 case RISCV::ANDN:
4143 case RISCV::ORN:
4144 case RISCV::XNOR:
4145 case RISCV::SH1ADD:
4146 case RISCV::SH2ADD:
4147 case RISCV::SH3ADD:
4148 RecCheck:
4149 if (hasAllNBitUsers(User, Bits, Depth + 1))
4150 break;
4151 return false;
4152 case RISCV::SRLI: {
4153 unsigned ShAmt = User->getConstantOperandVal(1);
4154 // If we are shifting right by less than Bits, and users don't demand any
4155 // bits that were shifted into [Bits-1:0], then we can consider this as an
4156 // N-Bit user.
4157 if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1))
4158 break;
4159 return false;
4160 }
4161 case RISCV::SEXT_B:
4162 case RISCV::PACKH:
4163 if (Bits >= 8)
4164 break;
4165 return false;
4166 case RISCV::SEXT_H:
4167 case RISCV::FMV_H_X:
4168 case RISCV::ZEXT_H_RV32:
4169 case RISCV::ZEXT_H_RV64:
4170 case RISCV::PACKW:
4171 if (Bits >= 16)
4172 break;
4173 return false;
4174 case RISCV::PACK:
4175 if (Bits >= (Subtarget->getXLen() / 2))
4176 break;
4177 return false;
4178 case RISCV::ADD_UW:
4179 case RISCV::SH1ADD_UW:
4180 case RISCV::SH2ADD_UW:
4181 case RISCV::SH3ADD_UW:
4182 // The first operand to add.uw/shXadd.uw is implicitly zero extended from
4183 // 32 bits.
4184 if (Use.getOperandNo() == 0 && Bits >= 32)
4185 break;
4186 return false;
4187 case RISCV::SB:
4188 if (Use.getOperandNo() == 0 && Bits >= 8)
4189 break;
4190 return false;
4191 case RISCV::SH:
4192 if (Use.getOperandNo() == 0 && Bits >= 16)
4193 break;
4194 return false;
4195 case RISCV::SW:
4196 if (Use.getOperandNo() == 0 && Bits >= 32)
4197 break;
4198 return false;
4199 case RISCV::TH_EXT:
4200 case RISCV::TH_EXTU: {
4201 unsigned Msb = User->getConstantOperandVal(1);
4202 unsigned Lsb = User->getConstantOperandVal(2);
4203 // Behavior of Msb < Lsb is not well documented.
4204 if (Msb >= Lsb && Bits > Msb)
4205 break;
4206 return false;
4207 }
4208 }
4209 }
4210
4211 return true;
4212}
4213
4214// Select a constant that can be represented as (sign_extend(imm5) << imm2).
4216 SDValue &Shl2) {
4217 auto *C = dyn_cast<ConstantSDNode>(N);
4218 if (!C)
4219 return false;
4220
4221 int64_t Offset = C->getSExtValue();
4222 for (unsigned Shift = 0; Shift < 4; Shift++) {
4223 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0)) {
4224 EVT VT = N->getValueType(0);
4225 Simm5 = CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(N), VT);
4226 Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), VT);
4227 return true;
4228 }
4229 }
4230
4231 return false;
4232}
4233
4234// Select VL as a 5 bit immediate or a value that will become a register. This
4235// allows us to choose between VSETIVLI or VSETVLI later.
4237 auto *C = dyn_cast<ConstantSDNode>(N);
4238 if (C && isUInt<5>(C->getZExtValue())) {
4239 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
4240 N->getValueType(0));
4241 } else if (C && C->isAllOnes()) {
4242 // Treat all ones as VLMax.
4243 VL = CurDAG->getSignedTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
4244 N->getValueType(0));
4245 } else if (isa<RegisterSDNode>(N) &&
4246 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {
4247 // All our VL operands use an operand that allows GPRNoX0 or an immediate
4248 // as the register class. Convert X0 to a special immediate to pass the
4249 // MachineVerifier. This is recognized specially by the vsetvli insertion
4250 // pass.
4251 VL = CurDAG->getSignedTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
4252 N->getValueType(0));
4253 } else {
4254 VL = N;
4255 }
4256
4257 return true;
4258}
4259
4261 if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {
4262 if (!N.getOperand(0).isUndef())
4263 return SDValue();
4264 N = N.getOperand(1);
4265 }
4266 SDValue Splat = N;
4267 if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&
4268 Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||
4269 !Splat.getOperand(0).isUndef())
4270 return SDValue();
4271 assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");
4272 return Splat;
4273}
4274
4277 if (!Splat)
4278 return false;
4279
4280 SplatVal = Splat.getOperand(1);
4281 return true;
4282}
4283
4285 SelectionDAG &DAG,
4286 const RISCVSubtarget &Subtarget,
4287 std::function<bool(int64_t)> ValidateImm,
4288 bool Decrement = false) {
4290 if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1)))
4291 return false;
4292
4293 const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();
4294 assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&
4295 "Unexpected splat operand type");
4296
4297 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
4298 // type is wider than the resulting vector element type: an implicit
4299 // truncation first takes place. Therefore, perform a manual
4300 // truncation/sign-extension in order to ignore any truncated bits and catch
4301 // any zero-extended immediate.
4302 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
4303 // sign-extending to (XLenVT -1).
4304 APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize);
4305
4306 int64_t SplatImm = SplatConst.getSExtValue();
4307
4308 if (!ValidateImm(SplatImm))
4309 return false;
4310
4311 if (Decrement)
4312 SplatImm -= 1;
4313
4314 SplatVal =
4315 DAG.getSignedTargetConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT());
4316 return true;
4317}
4318
4320 return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget,
4321 [](int64_t Imm) { return isInt<5>(Imm); });
4322}
4323
4325 return selectVSplatImmHelper(
4326 N, SplatVal, *CurDAG, *Subtarget,
4327 [](int64_t Imm) { return Imm >= -15 && Imm <= 16; },
4328 /*Decrement=*/true);
4329}
4330
4332 return selectVSplatImmHelper(
4333 N, SplatVal, *CurDAG, *Subtarget,
4334 [](int64_t Imm) { return Imm >= -15 && Imm <= 16; },
4335 /*Decrement=*/false);
4336}
4337
4339 SDValue &SplatVal) {
4340 return selectVSplatImmHelper(
4341 N, SplatVal, *CurDAG, *Subtarget,
4342 [](int64_t Imm) { return Imm != 0 && Imm >= -15 && Imm <= 16; },
4343 /*Decrement=*/true);
4344}
4345
4347 SDValue &SplatVal) {
4348 return selectVSplatImmHelper(
4349 N, SplatVal, *CurDAG, *Subtarget,
4350 [Bits](int64_t Imm) { return isUIntN(Bits, Imm); });
4351}
4352
4355 return Splat && selectNegImm(Splat.getOperand(1), SplatVal);
4356}
4357
4359 auto IsExtOrTrunc = [](SDValue N) {
4360 switch (N->getOpcode()) {
4361 case ISD::SIGN_EXTEND:
4362 case ISD::ZERO_EXTEND:
4363 // There's no passthru on these _VL nodes so any VL/mask is ok, since any
4364 // inactive elements will be undef.
4365 case RISCVISD::TRUNCATE_VECTOR_VL:
4366 case RISCVISD::VSEXT_VL:
4367 case RISCVISD::VZEXT_VL:
4368 return true;
4369 default:
4370 return false;
4371 }
4372 };
4373
4374 // We can have multiple nested nodes, so unravel them all if needed.
4375 while (IsExtOrTrunc(N)) {
4376 if (!N.hasOneUse() || N.getScalarValueSizeInBits() < 8)
4377 return false;
4378 N = N->getOperand(0);
4379 }
4380
4381 return selectVSplat(N, SplatVal);
4382}
4383
4385 // Allow bitcasts from XLenVT -> FP.
4386 if (N.getOpcode() == ISD::BITCAST &&
4387 N.getOperand(0).getValueType() == Subtarget->getXLenVT()) {
4388 Imm = N.getOperand(0);
4389 return true;
4390 }
4391 // Allow moves from XLenVT to FP.
4392 if (N.getOpcode() == RISCVISD::FMV_H_X ||
4393 N.getOpcode() == RISCVISD::FMV_W_X_RV64) {
4394 Imm = N.getOperand(0);
4395 return true;
4396 }
4397
4398 // Otherwise, look for FP constants that can materialized with scalar int.
4400 if (!CFP)
4401 return false;
4402 const APFloat &APF = CFP->getValueAPF();
4403 // td can handle +0.0 already.
4404 if (APF.isPosZero())
4405 return false;
4406
4407 MVT VT = CFP->getSimpleValueType(0);
4408
4409 MVT XLenVT = Subtarget->getXLenVT();
4410 if (VT == MVT::f64 && !Subtarget->is64Bit()) {
4411 assert(APF.isNegZero() && "Unexpected constant.");
4412 return false;
4413 }
4414 SDLoc DL(N);
4415 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
4416 *Subtarget);
4417 return true;
4418}
4419
4421 SDValue &Imm) {
4422 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
4423 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);
4424
4425 if (!isInt<5>(ImmVal))
4426 return false;
4427
4428 Imm = CurDAG->getSignedTargetConstant(ImmVal, SDLoc(N),
4429 Subtarget->getXLenVT());
4430 return true;
4431 }
4432
4433 return false;
4434}
4435
4436// Try to remove sext.w if the input is a W instruction or can be made into
4437// a W instruction cheaply.
4438bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
4439 // Look for the sext.w pattern, addiw rd, rs1, 0.
4440 if (N->getMachineOpcode() != RISCV::ADDIW ||
4441 !isNullConstant(N->getOperand(1)))
4442 return false;
4443
4444 SDValue N0 = N->getOperand(0);
4445 if (!N0.isMachineOpcode())
4446 return false;
4447
4448 switch (N0.getMachineOpcode()) {
4449 default:
4450 break;
4451 case RISCV::ADD:
4452 case RISCV::ADDI:
4453 case RISCV::SUB:
4454 case RISCV::MUL:
4455 case RISCV::SLLI: {
4456 // Convert sext.w+add/sub/mul to their W instructions. This will create
4457 // a new independent instruction. This improves latency.
4458 unsigned Opc;
4459 switch (N0.getMachineOpcode()) {
4460 default:
4461 llvm_unreachable("Unexpected opcode!");
4462 case RISCV::ADD: Opc = RISCV::ADDW; break;
4463 case RISCV::ADDI: Opc = RISCV::ADDIW; break;
4464 case RISCV::SUB: Opc = RISCV::SUBW; break;
4465 case RISCV::MUL: Opc = RISCV::MULW; break;
4466 case RISCV::SLLI: Opc = RISCV::SLLIW; break;
4467 }
4468
4469 SDValue N00 = N0.getOperand(0);
4470 SDValue N01 = N0.getOperand(1);
4471
4472 // Shift amount needs to be uimm5.
4473 if (N0.getMachineOpcode() == RISCV::SLLI &&
4474 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
4475 break;
4476
4477 SDNode *Result =
4478 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
4479 N00, N01);
4480 ReplaceUses(N, Result);
4481 return true;
4482 }
4483 case RISCV::ADDW:
4484 case RISCV::ADDIW:
4485 case RISCV::SUBW:
4486 case RISCV::MULW:
4487 case RISCV::SLLIW:
4488 case RISCV::PACKW:
4489 case RISCV::TH_MULAW:
4490 case RISCV::TH_MULAH:
4491 case RISCV::TH_MULSW:
4492 case RISCV::TH_MULSH:
4493 if (N0.getValueType() == MVT::i32)
4494 break;
4495
4496 // Result is already sign extended just remove the sext.w.
4497 // NOTE: We only handle the nodes that are selected with hasAllWUsers.
4498 ReplaceUses(N, N0.getNode());
4499 return true;
4500 }
4501
4502 return false;
4503}
4504
4505static bool usesAllOnesMask(SDValue MaskOp) {
4506 const auto IsVMSet = [](unsigned Opc) {
4507 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
4508 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
4509 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
4510 Opc == RISCV::PseudoVMSET_M_B8;
4511 };
4512
4513 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
4514 // undefined behaviour if it's the wrong bitwidth, so we could choose to
4515 // assume that it's all-ones? Same applies to its VL.
4516 return MaskOp->isMachineOpcode() && IsVMSet(MaskOp.getMachineOpcode());
4517}
4518
4519static bool isImplicitDef(SDValue V) {
4520 if (!V.isMachineOpcode())
4521 return false;
4522 if (V.getMachineOpcode() == TargetOpcode::REG_SEQUENCE) {
4523 for (unsigned I = 1; I < V.getNumOperands(); I += 2)
4524 if (!isImplicitDef(V.getOperand(I)))
4525 return false;
4526 return true;
4527 }
4528 return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
4529}
4530
4531// Optimize masked RVV pseudo instructions with a known all-ones mask to their
4532// corresponding "unmasked" pseudo versions.
4533bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
4534 const RISCV::RISCVMaskedPseudoInfo *I =
4535 RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
4536 if (!I)
4537 return false;
4538
4539 unsigned MaskOpIdx = I->MaskOpIdx;
4540 if (!usesAllOnesMask(N->getOperand(MaskOpIdx)))
4541 return false;
4542
4543 // There are two classes of pseudos in the table - compares and
4544 // everything else. See the comment on RISCVMaskedPseudo for details.
4545 const unsigned Opc = I->UnmaskedPseudo;
4546 const MCInstrDesc &MCID = TII->get(Opc);
4547 const bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MCID);
4548
4549 const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode());
4550 const bool MaskedHasPassthru = RISCVII::isFirstDefTiedToFirstUse(MaskedMCID);
4551
4552 assert((RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) ||
4554 "Unmasked pseudo has policy but masked pseudo doesn't?");
4555 assert(RISCVII::hasVecPolicyOp(MCID.TSFlags) == HasPassthru &&
4556 "Unexpected pseudo structure");
4557 assert(!(HasPassthru && !MaskedHasPassthru) &&
4558 "Unmasked pseudo has passthru but masked pseudo doesn't?");
4559
4561 // Skip the passthru operand at index 0 if the unmasked don't have one.
4562 bool ShouldSkip = !HasPassthru && MaskedHasPassthru;
4563 bool DropPolicy = !RISCVII::hasVecPolicyOp(MCID.TSFlags) &&
4564 RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags);
4565 bool HasChainOp =
4566 N->getOperand(N->getNumOperands() - 1).getValueType() == MVT::Other;
4567 unsigned LastOpNum = N->getNumOperands() - 1 - HasChainOp;
4568 for (unsigned I = ShouldSkip, E = N->getNumOperands(); I != E; I++) {
4569 // Skip the mask
4570 SDValue Op = N->getOperand(I);
4571 if (I == MaskOpIdx)
4572 continue;
4573 if (DropPolicy && I == LastOpNum)
4574 continue;
4575 Ops.push_back(Op);
4576 }
4577
4578 MachineSDNode *Result =
4579 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4580
4581 if (!N->memoperands_empty())
4582 CurDAG->setNodeMemRefs(Result, N->memoperands());
4583
4584 Result->setFlags(N->getFlags());
4585 ReplaceUses(N, Result);
4586
4587 return true;
4588}
4589
4590/// If our passthru is an implicit_def, use noreg instead. This side
4591/// steps issues with MachineCSE not being able to CSE expressions with
4592/// IMPLICIT_DEF operands while preserving the semantic intent. See
4593/// pr64282 for context. Note that this transform is the last one
4594/// performed at ISEL DAG to DAG.
4595bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
4596 bool MadeChange = false;
4597 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
4598
4599 while (Position != CurDAG->allnodes_begin()) {
4600 SDNode *N = &*--Position;
4601 if (N->use_empty() || !N->isMachineOpcode())
4602 continue;
4603
4604 const unsigned Opc = N->getMachineOpcode();
4605 if (!RISCVVPseudosTable::getPseudoInfo(Opc) ||
4607 !isImplicitDef(N->getOperand(0)))
4608 continue;
4609
4611 Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0)));
4612 for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
4613 SDValue Op = N->getOperand(I);
4614 Ops.push_back(Op);
4615 }
4616
4617 MachineSDNode *Result =
4618 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4619 Result->setFlags(N->getFlags());
4620 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands());
4621 ReplaceUses(N, Result);
4622 MadeChange = true;
4623 }
4624 return MadeChange;
4625}
4626
4627
4628// This pass converts a legalized DAG into a RISCV-specific DAG, ready
4629// for instruction scheduling.
4631 CodeGenOptLevel OptLevel) {
4632 return new RISCVDAGToDAGISelLegacy(TM, OptLevel);
4633}
4634
4636
4641
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define DEBUG_TYPE
const HexagonInstrInfo * TII
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
Register const TargetRegisterInfo * TRI
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
static bool getVal(MDTuple *MD, const char *Key, uint64_t &Val)
static bool usesAllOnesMask(SDValue MaskOp)
static Register getTileReg(uint64_t TileNum)
static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, int64_t Imm, const RISCVSubtarget &Subtarget)
static bool isRegRegScaleLoadOrStore(SDNode *User, SDValue Add, const RISCVSubtarget &Subtarget)
Return true if this a load/store that we have a RegRegScale instruction for.
#define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix)
static bool isWorthFoldingAdd(SDValue Add)
static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, RISCVMatInt::InstSeq &Seq)
static bool isImplicitDef(SDValue V)
#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix)
static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, std::function< bool(int64_t)> ValidateImm, bool Decrement=false)
static unsigned getSegInstNF(unsigned Intrinsic)
static bool isWorthFoldingIntoRegRegScale(const RISCVSubtarget &Subtarget, SDValue Add, SDValue Shift=SDValue())
Is it profitable to fold this Add into RegRegScale load/store.
static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo, unsigned Bits, const TargetInstrInfo *TII)
static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, const RISCVSubtarget *Subtarget, SDValue Addr, SDValue &Base, SDValue &Offset, bool IsPrefetch=false)
#define INST_ALL_NF_CASE_WITH_FF(NAME)
#define CASE_VMSLT_OPCODES(lmulenum, suffix)
bool isRegImmLoadOrStore(SDNode *User, SDValue Add)
static cl::opt< bool > UsePseudoMovImm("riscv-use-rematerializable-movimm", cl::Hidden, cl::desc("Use a rematerializable pseudoinstruction for 2 instruction " "constant materialization"), cl::init(false))
static SDValue findVSplat(SDValue N)
#define INST_ALL_NF_CASE(NAME)
static bool isApplicableToPLI(int Val)
Contains matchers for matching SelectionDAG nodes and values.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
#define PASS_NAME
DEMANGLE_DUMP_METHOD void dump() const
bool isZero() const
Definition APFloat.h:1427
APInt bitcastToAPInt() const
Definition APFloat.h:1335
bool isPosZero() const
Definition APFloat.h:1442
bool isNegZero() const
Definition APFloat.h:1443
Class for arbitrary precision integers.
Definition APInt.h:78
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1489
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1258
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1563
const APFloat & getValueAPF() const
uint64_t getZExtValue() const
int64_t getSExtValue() const
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
This class is used to form a handle around another node that is persistent and is updated across invo...
const SDValue & getValue() const
static StringRef getMemConstraintName(ConstraintCode C)
Definition InlineAsm.h:470
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Describe properties that are true of each instruction in the target description file.
Machine Value Type.
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
MVT getVectorElementType() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
void setFlags(Flags f)
Bitwise OR the current flags with the given flags.
An SDNode that represents everything that will be needed to construct a MachineInstr.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
RISCVDAGToDAGISelLegacy(RISCVTargetMachine &TargetMachine, CodeGenOptLevel OptLevel)
bool selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal, SDValue &Val)
RISC-V doesn't have general instructions for integer setne/seteq, but we can check for equality with ...
bool selectSExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectNegImm(SDValue N, SDValue &Val)
bool selectZExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectSHXADD_UWOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD_UW.
bool areOffsetsWithinAlignment(SDValue Addr, Align Alignment)
bool hasAllNBitUsers(SDNode *Node, unsigned Bits, const unsigned Depth=0) const
bool SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, SDValue &Offset)
Similar to SelectAddrRegImm, except that the least significant 5 bits of Offset should be all zeros.
bool selectZExtImm32(SDValue N, SDValue &Val)
bool SelectAddrRegZextRegScale(SDValue Addr, unsigned MaxShiftAmount, unsigned Bits, SDValue &Base, SDValue &Index, SDValue &Scale)
bool SelectAddrRegReg(SDValue Addr, SDValue &Base, SDValue &Offset)
void selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsOrdered)
void selectVLSEGFF(SDNode *Node, unsigned NF, bool IsMasked)
bool selectVSplatSimm5Plus1NoDec(SDValue N, SDValue &SplatVal)
bool selectSimm5Shl2(SDValue N, SDValue &Simm5, SDValue &Shl2)
void selectSF_VC_X_SE(SDNode *Node)
bool orDisjoint(const SDNode *Node) const
bool selectLow8BitsVSplat(SDValue N, SDValue &SplatVal)
bool hasAllHUsers(SDNode *Node) const
bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
bool selectVSplatSimm5(SDValue N, SDValue &SplatVal)
bool selectRVVSimm5(SDValue N, unsigned Width, SDValue &Imm)
bool SelectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset)
bool tryUnsignedBitfieldInsertInZero(SDNode *Node, const SDLoc &DL, MVT VT, SDValue X, unsigned Msb, unsigned Lsb)
bool hasAllWUsers(SDNode *Node) const
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
bool selectInvLogicImm(SDValue N, SDValue &Val)
bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset)
void Select(SDNode *Node) override
Main hook for targets to transform nodes into machine nodes.
void selectXSfmmVSET(SDNode *Node)
bool trySignedBitfieldInsertInSign(SDNode *Node)
bool selectVSplat(SDValue N, SDValue &SplatVal)
void addVectorLoadStoreOperands(SDNode *Node, unsigned SEWImm, const SDLoc &DL, unsigned CurOp, bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl< SDValue > &Operands, bool IsLoad=false, MVT *IndexVT=nullptr)
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
bool SelectAddrRegImm9(SDValue Addr, SDValue &Base, SDValue &Offset)
Similar to SelectAddrRegImm, except that the offset is restricted to uimm9.
bool selectScalarFPAsInt(SDValue N, SDValue &Imm)
bool hasAllBUsers(SDNode *Node) const
void selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsStrided)
bool tryShrinkShlLogicImm(SDNode *Node)
void selectVSETVLI(SDNode *Node)
bool selectVLOp(SDValue N, SDValue &VL)
bool trySignedBitfieldExtract(SDNode *Node)
bool selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal)
void selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsStrided)
bool selectVSplatImm64Neg(SDValue N, SDValue &SplatVal)
bool selectVSplatSimm5Plus1NonZero(SDValue N, SDValue &SplatVal)
bool tryUnsignedBitfieldExtract(SDNode *Node, const SDLoc &DL, MVT VT, SDValue X, unsigned Msb, unsigned Lsb)
void selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsOrdered)
bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt)
bool selectSHXADDOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD.
bool tryIndexedLoad(SDNode *Node)
bool SelectAddrRegRegScale(SDValue Addr, unsigned MaxShiftAmount, SDValue &Base, SDValue &Index, SDValue &Scale)
bool selectVSplatUimm(SDValue N, unsigned Bits, SDValue &SplatVal)
bool hasShlAdd(int64_t ShAmt) const
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
static unsigned getRegClassIDForVecVT(MVT VT)
static RISCVVType::VLMUL getLMUL(MVT VT)
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
SelectionDAGISelLegacy(char &ID, std::unique_ptr< SelectionDAGISel > S)
const TargetLowering * TLI
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const
IsProfitableToFold - Returns true if it's profitable to fold the specific operand node N of U during ...
static bool IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, CodeGenOptLevel OptLevel, bool IgnoreChains=false)
IsLegalToFold - Returns true if the specific operand node N of U can be folded during instruction sel...
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
static constexpr unsigned MaxRecursionDepth
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
ilist< SDNode >::iterator allnodes_iterator
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
iterator_range< user_iterator > users()
Definition Value.h:426
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:807
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:593
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:841
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:981
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:662
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:669
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:607
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:876
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:966
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
static bool hasVLOp(uint64_t TSFlags)
static bool hasVecPolicyOp(uint64_t TSFlags)
static bool hasSEWOp(uint64_t TSFlags)
static bool isFirstDefTiedToFirstUse(const MCInstrDesc &Desc)
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
SmallVector< Inst, 8 > InstSeq
Definition RISCVMatInt.h:43
static unsigned decodeVSEW(unsigned VSEW)
LLVM_ABI unsigned encodeXSfmmVType(unsigned SEW, unsigned Widen, bool AltFmt)
LLVM_ABI std::pair< unsigned, bool > decodeVLMUL(VLMUL VLMul)
LLVM_ABI unsigned getSEWLMULRatio(unsigned SEW, VLMUL VLMul)
static unsigned decodeTWiden(unsigned TWiden)
LLVM_ABI unsigned encodeVTYPE(VLMUL VLMUL, unsigned SEW, bool TailAgnostic, bool MaskAgnostic, bool AltFmt=false)
unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode)
std::optional< unsigned > getVectorLowDemandedScalarBits(unsigned Opcode, unsigned Log2SEW)
static constexpr unsigned RVVBitsPerBlock
static constexpr int64_t VLMaxSentinel
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
static const MachineMemOperand::Flags MONontemporalBit1
InstructionCost Cost
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:293
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:303
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
static const MachineMemOperand::Flags MONontemporalBit0
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
constexpr bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
unsigned M1(unsigned Val)
Definition VE.h:377
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:236
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
constexpr T maskTrailingZeros(unsigned N)
Create a bitmask with the N right-most bits set to 0, and all other bits set to 1.
Definition MathExtras.h:94
@ Add
Sum of integers.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
FunctionPass * createRISCVISelDag(RISCVTargetMachine &TM, CodeGenOptLevel OptLevel)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:870
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.