LLVM 19.0.0git
RISCVISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the RISC-V target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "RISCVISelDAGToDAG.h"
17#include "RISCVISelLowering.h"
20#include "llvm/IR/IntrinsicsRISCV.h"
22#include "llvm/Support/Debug.h"
25
26using namespace llvm;
27
28#define DEBUG_TYPE "riscv-isel"
29#define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
30
32 "riscv-use-rematerializable-movimm", cl::Hidden,
33 cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
34 "constant materialization"),
35 cl::init(false));
36
37namespace llvm::RISCV {
38#define GET_RISCVVSSEGTable_IMPL
39#define GET_RISCVVLSEGTable_IMPL
40#define GET_RISCVVLXSEGTable_IMPL
41#define GET_RISCVVSXSEGTable_IMPL
42#define GET_RISCVVLETable_IMPL
43#define GET_RISCVVSETable_IMPL
44#define GET_RISCVVLXTable_IMPL
45#define GET_RISCVVSXTable_IMPL
46#define GET_RISCVMaskedPseudosTable_IMPL
47#include "RISCVGenSearchableTables.inc"
48} // namespace llvm::RISCV
49
52
53 bool MadeChange = false;
54 while (Position != CurDAG->allnodes_begin()) {
55 SDNode *N = &*--Position;
56 if (N->use_empty())
57 continue;
58
59 SDValue Result;
60 switch (N->getOpcode()) {
61 case ISD::SPLAT_VECTOR: {
62 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
63 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
64 MVT VT = N->getSimpleValueType(0);
65 unsigned Opc =
67 SDLoc DL(N);
68 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
69 SDValue Src = N->getOperand(0);
70 if (VT.isInteger())
71 Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(),
72 N->getOperand(0));
73 Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL);
74 break;
75 }
77 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
78 // load. Done after lowering and combining so that we have a chance to
79 // optimize this to VMV_V_X_VL when the upper bits aren't needed.
80 assert(N->getNumOperands() == 4 && "Unexpected number of operands");
81 MVT VT = N->getSimpleValueType(0);
82 SDValue Passthru = N->getOperand(0);
83 SDValue Lo = N->getOperand(1);
84 SDValue Hi = N->getOperand(2);
85 SDValue VL = N->getOperand(3);
86 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
87 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
88 "Unexpected VTs!");
90 SDLoc DL(N);
91
92 // Create temporary stack for each expanding node.
93 SDValue StackSlot =
95 int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex();
97
98 SDValue Chain = CurDAG->getEntryNode();
99 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
100
101 SDValue OffsetSlot =
103 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
104 Align(8));
105
106 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
107
108 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
109 SDValue IntID =
110 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
111 SDValue Ops[] = {Chain,
112 IntID,
113 Passthru,
114 StackSlot,
115 CurDAG->getRegister(RISCV::X0, MVT::i64),
116 VL};
117
119 MVT::i64, MPI, Align(8),
121 break;
122 }
123 }
124
125 if (Result) {
126 LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: ");
127 LLVM_DEBUG(N->dump(CurDAG));
128 LLVM_DEBUG(dbgs() << "\nNew: ");
129 LLVM_DEBUG(Result->dump(CurDAG));
130 LLVM_DEBUG(dbgs() << "\n");
131
133 MadeChange = true;
134 }
135 }
136
137 if (MadeChange)
139}
140
142 HandleSDNode Dummy(CurDAG->getRoot());
144
145 bool MadeChange = false;
146 while (Position != CurDAG->allnodes_begin()) {
147 SDNode *N = &*--Position;
148 // Skip dead nodes and any non-machine opcodes.
149 if (N->use_empty() || !N->isMachineOpcode())
150 continue;
151
152 MadeChange |= doPeepholeSExtW(N);
153
154 // FIXME: This is here only because the VMerge transform doesn't
155 // know how to handle masked true inputs. Once that has been moved
156 // to post-ISEL, this can be deleted as well.
157 MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N));
158 }
159
160 CurDAG->setRoot(Dummy.getValue());
161
162 MadeChange |= doPeepholeMergeVVMFold();
163
164 // After we're done with everything else, convert IMPLICIT_DEF
165 // passthru operands to NoRegister. This is required to workaround
166 // an optimization deficiency in MachineCSE. This really should
167 // be merged back into each of the patterns (i.e. there's no good
168 // reason not to go directly to NoReg), but is being done this way
169 // to allow easy backporting.
170 MadeChange |= doPeepholeNoRegPassThru();
171
172 if (MadeChange)
174}
175
176static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
178 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);
179 for (const RISCVMatInt::Inst &Inst : Seq) {
180 SDValue SDImm = CurDAG->getTargetConstant(Inst.getImm(), DL, VT);
181 SDNode *Result = nullptr;
182 switch (Inst.getOpndKind()) {
183 case RISCVMatInt::Imm:
184 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm);
185 break;
187 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg,
188 CurDAG->getRegister(RISCV::X0, VT));
189 break;
191 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg);
192 break;
194 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm);
195 break;
196 }
197
198 // Only the first instruction has X0 as its source.
199 SrcReg = SDValue(Result, 0);
200 }
201
202 return SrcReg;
203}
204
205static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
206 int64_t Imm, const RISCVSubtarget &Subtarget) {
208
209 // Use a rematerializable pseudo instruction for short sequences if enabled.
210 if (Seq.size() == 2 && UsePseudoMovImm)
211 return SDValue(
212 CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT,
213 CurDAG->getTargetConstant(Imm, DL, VT)),
214 0);
215
216 // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
217 // worst an LUI+ADDIW. This will require an extra register, but avoids a
218 // constant pool.
219 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
220 // low and high 32 bits are the same and bit 31 and 63 are set.
221 if (Seq.size() > 3) {
222 unsigned ShiftAmt, AddOpc;
224 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
225 if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
226 SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
227
228 SDValue SLLI = SDValue(
229 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
230 CurDAG->getTargetConstant(ShiftAmt, DL, VT)),
231 0);
232 return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0);
233 }
234 }
235
236 // Otherwise, use the original sequence.
237 return selectImmSeq(CurDAG, DL, VT, Seq);
238}
239
241 unsigned NF, RISCVII::VLMUL LMUL) {
242 static const unsigned M1TupleRegClassIDs[] = {
243 RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID,
244 RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID,
245 RISCV::VRN8M1RegClassID};
246 static const unsigned M2TupleRegClassIDs[] = {RISCV::VRN2M2RegClassID,
247 RISCV::VRN3M2RegClassID,
248 RISCV::VRN4M2RegClassID};
249
250 assert(Regs.size() >= 2 && Regs.size() <= 8);
251
252 unsigned RegClassID;
253 unsigned SubReg0;
254 switch (LMUL) {
255 default:
256 llvm_unreachable("Invalid LMUL.");
261 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
262 "Unexpected subreg numbering");
263 SubReg0 = RISCV::sub_vrm1_0;
264 RegClassID = M1TupleRegClassIDs[NF - 2];
265 break;
267 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
268 "Unexpected subreg numbering");
269 SubReg0 = RISCV::sub_vrm2_0;
270 RegClassID = M2TupleRegClassIDs[NF - 2];
271 break;
273 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
274 "Unexpected subreg numbering");
275 SubReg0 = RISCV::sub_vrm4_0;
276 RegClassID = RISCV::VRN2M4RegClassID;
277 break;
278 }
279
280 SDLoc DL(Regs[0]);
282
283 Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32));
284
285 for (unsigned I = 0; I < Regs.size(); ++I) {
286 Ops.push_back(Regs[I]);
287 Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32));
288 }
289 SDNode *N =
290 CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
291 return SDValue(N, 0);
292}
293
295 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
296 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
297 bool IsLoad, MVT *IndexVT) {
298 SDValue Chain = Node->getOperand(0);
299 SDValue Glue;
300
301 Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.
302
303 if (IsStridedOrIndexed) {
304 Operands.push_back(Node->getOperand(CurOp++)); // Index.
305 if (IndexVT)
306 *IndexVT = Operands.back()->getSimpleValueType(0);
307 }
308
309 if (IsMasked) {
310 // Mask needs to be copied to V0.
311 SDValue Mask = Node->getOperand(CurOp++);
312 Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue());
313 Glue = Chain.getValue(1);
314 Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType()));
315 }
316 SDValue VL;
317 selectVLOp(Node->getOperand(CurOp++), VL);
318 Operands.push_back(VL);
319
320 MVT XLenVT = Subtarget->getXLenVT();
321 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
322 Operands.push_back(SEWOp);
323
324 // At the IR layer, all the masked load intrinsics have policy operands,
325 // none of the others do. All have passthru operands. For our pseudos,
326 // all loads have policy operands.
327 if (IsLoad) {
329 if (IsMasked)
330 Policy = Node->getConstantOperandVal(CurOp++);
331 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
332 Operands.push_back(PolicyOp);
333 }
334
335 Operands.push_back(Chain); // Chain.
336 if (Glue)
337 Operands.push_back(Glue);
338}
339
340void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked,
341 bool IsStrided) {
342 SDLoc DL(Node);
343 unsigned NF = Node->getNumValues() - 1;
344 MVT VT = Node->getSimpleValueType(0);
345 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
347
348 unsigned CurOp = 2;
350
351 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
352 Node->op_begin() + CurOp + NF);
353 SDValue Merge = createTuple(*CurDAG, Regs, NF, LMUL);
354 Operands.push_back(Merge);
355 CurOp += NF;
356
357 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
358 Operands, /*IsLoad=*/true);
359
360 const RISCV::VLSEGPseudo *P =
361 RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,
362 static_cast<unsigned>(LMUL));
363 MachineSDNode *Load =
364 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
365
366 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
367 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
368
369 SDValue SuperReg = SDValue(Load, 0);
370 for (unsigned I = 0; I < NF; ++I) {
371 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
372 ReplaceUses(SDValue(Node, I),
373 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
374 }
375
376 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
377 CurDAG->RemoveDeadNode(Node);
378}
379
380void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) {
381 SDLoc DL(Node);
382 unsigned NF = Node->getNumValues() - 2; // Do not count VL and Chain.
383 MVT VT = Node->getSimpleValueType(0);
384 MVT XLenVT = Subtarget->getXLenVT();
385 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
387
388 unsigned CurOp = 2;
390
391 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
392 Node->op_begin() + CurOp + NF);
393 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
394 Operands.push_back(MaskedOff);
395 CurOp += NF;
396
397 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
398 /*IsStridedOrIndexed*/ false, Operands,
399 /*IsLoad=*/true);
400
401 const RISCV::VLSEGPseudo *P =
402 RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,
403 Log2SEW, static_cast<unsigned>(LMUL));
404 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
405 XLenVT, MVT::Other, Operands);
406
407 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
408 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
409
410 SDValue SuperReg = SDValue(Load, 0);
411 for (unsigned I = 0; I < NF; ++I) {
412 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
413 ReplaceUses(SDValue(Node, I),
414 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
415 }
416
417 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); // VL
418 ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 2)); // Chain
419 CurDAG->RemoveDeadNode(Node);
420}
421
422void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked,
423 bool IsOrdered) {
424 SDLoc DL(Node);
425 unsigned NF = Node->getNumValues() - 1;
426 MVT VT = Node->getSimpleValueType(0);
427 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
429
430 unsigned CurOp = 2;
432
433 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
434 Node->op_begin() + CurOp + NF);
435 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
436 Operands.push_back(MaskedOff);
437 CurOp += NF;
438
439 MVT IndexVT;
440 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
441 /*IsStridedOrIndexed*/ true, Operands,
442 /*IsLoad=*/true, &IndexVT);
443
445 "Element count mismatch");
446
447 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
448 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
449 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
450 report_fatal_error("The V extension does not support EEW=64 for index "
451 "values when XLEN=32");
452 }
453 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
454 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
455 static_cast<unsigned>(IndexLMUL));
456 MachineSDNode *Load =
457 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
458
459 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
460 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
461
462 SDValue SuperReg = SDValue(Load, 0);
463 for (unsigned I = 0; I < NF; ++I) {
464 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
465 ReplaceUses(SDValue(Node, I),
466 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
467 }
468
469 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
470 CurDAG->RemoveDeadNode(Node);
471}
472
473void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, bool IsMasked,
474 bool IsStrided) {
475 SDLoc DL(Node);
476 unsigned NF = Node->getNumOperands() - 4;
477 if (IsStrided)
478 NF--;
479 if (IsMasked)
480 NF--;
481 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
482 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
484 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
485 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
486
488 Operands.push_back(StoreVal);
489 unsigned CurOp = 2 + NF;
490
491 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
492 Operands);
493
494 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
495 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
496 MachineSDNode *Store =
497 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
498
499 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
500 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
501
502 ReplaceNode(Node, Store);
503}
504
505void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked,
506 bool IsOrdered) {
507 SDLoc DL(Node);
508 unsigned NF = Node->getNumOperands() - 5;
509 if (IsMasked)
510 --NF;
511 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
512 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
514 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
515 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
516
518 Operands.push_back(StoreVal);
519 unsigned CurOp = 2 + NF;
520
521 MVT IndexVT;
522 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
523 /*IsStridedOrIndexed*/ true, Operands,
524 /*IsLoad=*/false, &IndexVT);
525
527 "Element count mismatch");
528
529 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
530 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
531 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
532 report_fatal_error("The V extension does not support EEW=64 for index "
533 "values when XLEN=32");
534 }
535 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
536 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
537 static_cast<unsigned>(IndexLMUL));
538 MachineSDNode *Store =
539 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
540
541 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
542 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
543
544 ReplaceNode(Node, Store);
545}
546
548 if (!Subtarget->hasVInstructions())
549 return;
550
551 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
552
553 SDLoc DL(Node);
554 MVT XLenVT = Subtarget->getXLenVT();
555
556 unsigned IntNo = Node->getConstantOperandVal(0);
557
558 assert((IntNo == Intrinsic::riscv_vsetvli ||
559 IntNo == Intrinsic::riscv_vsetvlimax) &&
560 "Unexpected vsetvli intrinsic");
561
562 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
563 unsigned Offset = (VLMax ? 1 : 2);
564
565 assert(Node->getNumOperands() == Offset + 2 &&
566 "Unexpected number of operands");
567
568 unsigned SEW =
569 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
570 RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>(
571 Node->getConstantOperandVal(Offset + 1) & 0x7);
572
573 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
574 /*MaskAgnostic*/ true);
575 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
576
577 SDValue VLOperand;
578 unsigned Opcode = RISCV::PseudoVSETVLI;
579 if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
580 if (auto VLEN = Subtarget->getRealVLen())
581 if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())
582 VLMax = true;
583 }
584 if (VLMax || isAllOnesConstant(Node->getOperand(1))) {
585 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
586 Opcode = RISCV::PseudoVSETVLIX0;
587 } else {
588 VLOperand = Node->getOperand(1);
589
590 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
591 uint64_t AVL = C->getZExtValue();
592 if (isUInt<5>(AVL)) {
593 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
594 ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL,
595 XLenVT, VLImm, VTypeIOp));
596 return;
597 }
598 }
599 }
600
601 ReplaceNode(Node,
602 CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp));
603}
604
606 MVT VT = Node->getSimpleValueType(0);
607 unsigned Opcode = Node->getOpcode();
608 assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&
609 "Unexpected opcode");
610 SDLoc DL(Node);
611
612 // For operations of the form (x << C1) op C2, check if we can use
613 // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
614 SDValue N0 = Node->getOperand(0);
615 SDValue N1 = Node->getOperand(1);
616
617 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
618 if (!Cst)
619 return false;
620
621 int64_t Val = Cst->getSExtValue();
622
623 // Check if immediate can already use ANDI/ORI/XORI.
624 if (isInt<12>(Val))
625 return false;
626
627 SDValue Shift = N0;
628
629 // If Val is simm32 and we have a sext_inreg from i32, then the binop
630 // produces at least 33 sign bits. We can peek through the sext_inreg and use
631 // a SLLIW at the end.
632 bool SignExt = false;
633 if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
634 N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) {
635 SignExt = true;
636 Shift = N0.getOperand(0);
637 }
638
639 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
640 return false;
641
642 ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
643 if (!ShlCst)
644 return false;
645
646 uint64_t ShAmt = ShlCst->getZExtValue();
647
648 // Make sure that we don't change the operation by removing bits.
649 // This only matters for OR and XOR, AND is unaffected.
650 uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt);
651 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
652 return false;
653
654 int64_t ShiftedVal = Val >> ShAmt;
655 if (!isInt<12>(ShiftedVal))
656 return false;
657
658 // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
659 if (SignExt && ShAmt >= 32)
660 return false;
661
662 // Ok, we can reorder to get a smaller immediate.
663 unsigned BinOpc;
664 switch (Opcode) {
665 default: llvm_unreachable("Unexpected opcode");
666 case ISD::AND: BinOpc = RISCV::ANDI; break;
667 case ISD::OR: BinOpc = RISCV::ORI; break;
668 case ISD::XOR: BinOpc = RISCV::XORI; break;
669 }
670
671 unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;
672
673 SDNode *BinOp =
674 CurDAG->getMachineNode(BinOpc, DL, VT, Shift.getOperand(0),
675 CurDAG->getTargetConstant(ShiftedVal, DL, VT));
676 SDNode *SLLI =
677 CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0),
678 CurDAG->getTargetConstant(ShAmt, DL, VT));
679 ReplaceNode(Node, SLLI);
680 return true;
681}
682
684 // Only supported with XTHeadBb at the moment.
685 if (!Subtarget->hasVendorXTHeadBb())
686 return false;
687
688 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
689 if (!N1C)
690 return false;
691
692 SDValue N0 = Node->getOperand(0);
693 if (!N0.hasOneUse())
694 return false;
695
696 auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb, SDLoc DL,
697 MVT VT) {
698 return CurDAG->getMachineNode(RISCV::TH_EXT, DL, VT, N0.getOperand(0),
699 CurDAG->getTargetConstant(Msb, DL, VT),
700 CurDAG->getTargetConstant(Lsb, DL, VT));
701 };
702
703 SDLoc DL(Node);
704 MVT VT = Node->getSimpleValueType(0);
705 const unsigned RightShAmt = N1C->getZExtValue();
706
707 // Transform (sra (shl X, C1) C2) with C1 < C2
708 // -> (TH.EXT X, msb, lsb)
709 if (N0.getOpcode() == ISD::SHL) {
710 auto *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
711 if (!N01C)
712 return false;
713
714 const unsigned LeftShAmt = N01C->getZExtValue();
715 // Make sure that this is a bitfield extraction (i.e., the shift-right
716 // amount can not be less than the left-shift).
717 if (LeftShAmt > RightShAmt)
718 return false;
719
720 const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
721 const unsigned Msb = MsbPlusOne - 1;
722 const unsigned Lsb = RightShAmt - LeftShAmt;
723
724 SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
725 ReplaceNode(Node, TH_EXT);
726 return true;
727 }
728
729 // Transform (sra (sext_inreg X, _), C) ->
730 // (TH.EXT X, msb, lsb)
731 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
732 unsigned ExtSize =
733 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
734
735 // ExtSize of 32 should use sraiw via tablegen pattern.
736 if (ExtSize == 32)
737 return false;
738
739 const unsigned Msb = ExtSize - 1;
740 const unsigned Lsb = RightShAmt;
741
742 SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
743 ReplaceNode(Node, TH_EXT);
744 return true;
745 }
746
747 return false;
748}
749
751 // Target does not support indexed loads.
752 if (!Subtarget->hasVendorXTHeadMemIdx())
753 return false;
754
755 LoadSDNode *Ld = cast<LoadSDNode>(Node);
757 if (AM == ISD::UNINDEXED)
758 return false;
759
760 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Ld->getOffset());
761 if (!C)
762 return false;
763
764 EVT LoadVT = Ld->getMemoryVT();
765 assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) &&
766 "Unexpected addressing mode");
767 bool IsPre = AM == ISD::PRE_INC;
768 bool IsPost = AM == ISD::POST_INC;
769 int64_t Offset = C->getSExtValue();
770
771 // The constants that can be encoded in the THeadMemIdx instructions
772 // are of the form (sign_extend(imm5) << imm2).
773 int64_t Shift;
774 for (Shift = 0; Shift < 4; Shift++)
775 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
776 break;
777
778 // Constant cannot be encoded.
779 if (Shift == 4)
780 return false;
781
782 bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
783 unsigned Opcode;
784 if (LoadVT == MVT::i8 && IsPre)
785 Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
786 else if (LoadVT == MVT::i8 && IsPost)
787 Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
788 else if (LoadVT == MVT::i16 && IsPre)
789 Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
790 else if (LoadVT == MVT::i16 && IsPost)
791 Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
792 else if (LoadVT == MVT::i32 && IsPre)
793 Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
794 else if (LoadVT == MVT::i32 && IsPost)
795 Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
796 else if (LoadVT == MVT::i64 && IsPre)
797 Opcode = RISCV::TH_LDIB;
798 else if (LoadVT == MVT::i64 && IsPost)
799 Opcode = RISCV::TH_LDIA;
800 else
801 return false;
802
803 EVT Ty = Ld->getOffset().getValueType();
804 SDValue Ops[] = {Ld->getBasePtr(),
805 CurDAG->getTargetConstant(Offset >> Shift, SDLoc(Node), Ty),
806 CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty),
807 Ld->getChain()};
808 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0),
809 Ld->getValueType(1), MVT::Other, Ops);
810
811 MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand();
812 CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp});
813
814 ReplaceNode(Node, New);
815
816 return true;
817}
818
820 if (!Subtarget->hasVInstructions())
821 return;
822
823 assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode");
824
825 SDLoc DL(Node);
826 unsigned IntNo = Node->getConstantOperandVal(1);
827
828 assert((IntNo == Intrinsic::riscv_sf_vc_x_se ||
829 IntNo == Intrinsic::riscv_sf_vc_i_se) &&
830 "Unexpected vsetvli intrinsic");
831
832 // imm, imm, imm, simm5/scalar, sew, log2lmul, vl
833 unsigned Log2SEW = Log2_32(Node->getConstantOperandVal(6));
834 SDValue SEWOp =
835 CurDAG->getTargetConstant(Log2SEW, DL, Subtarget->getXLenVT());
836 SmallVector<SDValue, 8> Operands = {Node->getOperand(2), Node->getOperand(3),
837 Node->getOperand(4), Node->getOperand(5),
838 Node->getOperand(8), SEWOp,
839 Node->getOperand(0)};
840
841 unsigned Opcode;
842 auto *LMulSDNode = cast<ConstantSDNode>(Node->getOperand(7));
843 switch (LMulSDNode->getSExtValue()) {
844 case 5:
845 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF8
846 : RISCV::PseudoVC_I_SE_MF8;
847 break;
848 case 6:
849 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF4
850 : RISCV::PseudoVC_I_SE_MF4;
851 break;
852 case 7:
853 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF2
854 : RISCV::PseudoVC_I_SE_MF2;
855 break;
856 case 0:
857 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M1
858 : RISCV::PseudoVC_I_SE_M1;
859 break;
860 case 1:
861 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M2
862 : RISCV::PseudoVC_I_SE_M2;
863 break;
864 case 2:
865 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M4
866 : RISCV::PseudoVC_I_SE_M4;
867 break;
868 case 3:
869 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M8
870 : RISCV::PseudoVC_I_SE_M8;
871 break;
872 }
873
875 Opcode, DL, Node->getSimpleValueType(0), Operands));
876}
877
879 // If we have a custom node, we have already selected.
880 if (Node->isMachineOpcode()) {
881 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
882 Node->setNodeId(-1);
883 return;
884 }
885
886 // Instruction Selection not handled by the auto-generated tablegen selection
887 // should be handled here.
888 unsigned Opcode = Node->getOpcode();
889 MVT XLenVT = Subtarget->getXLenVT();
890 SDLoc DL(Node);
891 MVT VT = Node->getSimpleValueType(0);
892
893 bool HasBitTest = Subtarget->hasStdExtZbs() || Subtarget->hasVendorXTHeadBs();
894
895 switch (Opcode) {
896 case ISD::Constant: {
897 assert((VT == Subtarget->getXLenVT() || VT == MVT::i32) && "Unexpected VT");
898 auto *ConstNode = cast<ConstantSDNode>(Node);
899 if (ConstNode->isZero()) {
900 SDValue New =
901 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
902 ReplaceNode(Node, New.getNode());
903 return;
904 }
905 int64_t Imm = ConstNode->getSExtValue();
906 // If the upper XLen-16 bits are not used, try to convert this to a simm12
907 // by sign extending bit 15.
908 if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) &&
909 hasAllHUsers(Node))
910 Imm = SignExtend64<16>(Imm);
911 // If the upper 32-bits are not used try to convert this into a simm32 by
912 // sign extending bit 32.
913 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
914 Imm = SignExtend64<32>(Imm);
915
916 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode());
917 return;
918 }
919 case ISD::ConstantFP: {
920 const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();
921 auto [FPImm, NeedsFNeg] =
922 static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm(APF,
923 VT);
924 if (FPImm >= 0) {
925 unsigned Opc;
926 unsigned FNegOpc;
927 switch (VT.SimpleTy) {
928 default:
929 llvm_unreachable("Unexpected size");
930 case MVT::f16:
931 Opc = RISCV::FLI_H;
932 FNegOpc = RISCV::FSGNJN_H;
933 break;
934 case MVT::f32:
935 Opc = RISCV::FLI_S;
936 FNegOpc = RISCV::FSGNJN_S;
937 break;
938 case MVT::f64:
939 Opc = RISCV::FLI_D;
940 FNegOpc = RISCV::FSGNJN_D;
941 break;
942 }
944 Opc, DL, VT, CurDAG->getTargetConstant(FPImm, DL, XLenVT));
945 if (NeedsFNeg)
946 Res = CurDAG->getMachineNode(FNegOpc, DL, VT, SDValue(Res, 0),
947 SDValue(Res, 0));
948
949 ReplaceNode(Node, Res);
950 return;
951 }
952
953 bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
954 SDValue Imm;
955 // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
956 // create an integer immediate.
957 if (APF.isPosZero() || NegZeroF64)
958 Imm = CurDAG->getRegister(RISCV::X0, XLenVT);
959 else
960 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
961 *Subtarget);
962
963 bool HasZdinx = Subtarget->hasStdExtZdinx();
964 bool Is64Bit = Subtarget->is64Bit();
965 unsigned Opc;
966 switch (VT.SimpleTy) {
967 default:
968 llvm_unreachable("Unexpected size");
969 case MVT::bf16:
970 assert(Subtarget->hasStdExtZfbfmin());
971 Opc = RISCV::FMV_H_X;
972 break;
973 case MVT::f16:
974 Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
975 break;
976 case MVT::f32:
977 Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
978 break;
979 case MVT::f64:
980 // For RV32, we can't move from a GPR, we need to convert instead. This
981 // should only happen for +0.0 and -0.0.
982 assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
983 if (Is64Bit)
984 Opc = HasZdinx ? RISCV::COPY : RISCV::FMV_D_X;
985 else
986 Opc = HasZdinx ? RISCV::FCVT_D_W_IN32X : RISCV::FCVT_D_W;
987 break;
988 }
989
990 SDNode *Res;
991 if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
992 Res = CurDAG->getMachineNode(
993 Opc, DL, VT, Imm,
995 else
996 Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
997
998 // For f64 -0.0, we need to insert a fneg.d idiom.
999 if (NegZeroF64) {
1000 Opc = RISCV::FSGNJN_D;
1001 if (HasZdinx)
1002 Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;
1003 Res =
1004 CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0));
1005 }
1006
1007 ReplaceNode(Node, Res);
1008 return;
1009 }
1010 case RISCVISD::SplitF64: {
1011 if (!Subtarget->hasStdExtZfa())
1012 break;
1013 assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
1014 "Unexpected subtarget");
1015
1016 // With Zfa, lower to fmv.x.w and fmvh.x.d.
1017 if (!SDValue(Node, 0).use_empty()) {
1018 SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT,
1019 Node->getOperand(0));
1020 ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0));
1021 }
1022 if (!SDValue(Node, 1).use_empty()) {
1023 SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT,
1024 Node->getOperand(0));
1025 ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0));
1026 }
1027
1028 CurDAG->RemoveDeadNode(Node);
1029 return;
1030 }
1031 case ISD::SHL: {
1032 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1033 if (!N1C)
1034 break;
1035 SDValue N0 = Node->getOperand(0);
1036 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
1037 !isa<ConstantSDNode>(N0.getOperand(1)))
1038 break;
1039 unsigned ShAmt = N1C->getZExtValue();
1040 uint64_t Mask = N0.getConstantOperandVal(1);
1041
1042 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) where C2 has
1043 // 32 leading zeros and C3 trailing zeros.
1044 if (ShAmt <= 32 && isShiftedMask_64(Mask)) {
1045 unsigned XLen = Subtarget->getXLen();
1046 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1047 unsigned TrailingZeros = llvm::countr_zero(Mask);
1048 if (TrailingZeros > 0 && LeadingZeros == 32) {
1049 SDNode *SRLIW = CurDAG->getMachineNode(
1050 RISCV::SRLIW, DL, VT, N0->getOperand(0),
1051 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1052 SDNode *SLLI = CurDAG->getMachineNode(
1053 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1054 CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));
1055 ReplaceNode(Node, SLLI);
1056 return;
1057 }
1058 }
1059 break;
1060 }
1061 case ISD::SRL: {
1062 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1063 if (!N1C)
1064 break;
1065 SDValue N0 = Node->getOperand(0);
1066 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1067 break;
1068 unsigned ShAmt = N1C->getZExtValue();
1069 uint64_t Mask = N0.getConstantOperandVal(1);
1070
1071 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
1072 // 32 leading zeros and C3 trailing zeros.
1073 if (isShiftedMask_64(Mask) && N0.hasOneUse()) {
1074 unsigned XLen = Subtarget->getXLen();
1075 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1076 unsigned TrailingZeros = llvm::countr_zero(Mask);
1077 if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
1078 SDNode *SRLIW = CurDAG->getMachineNode(
1079 RISCV::SRLIW, DL, VT, N0->getOperand(0),
1080 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1081 SDNode *SLLI = CurDAG->getMachineNode(
1082 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1083 CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT));
1084 ReplaceNode(Node, SLLI);
1085 return;
1086 }
1087 }
1088
1089 // Optimize (srl (and X, C2), C) ->
1090 // (srli (slli X, (XLen-C3), (XLen-C3) + C)
1091 // Where C2 is a mask with C3 trailing ones.
1092 // Taking into account that the C2 may have had lower bits unset by
1093 // SimplifyDemandedBits. This avoids materializing the C2 immediate.
1094 // This pattern occurs when type legalizing right shifts for types with
1095 // less than XLen bits.
1096 Mask |= maskTrailingOnes<uint64_t>(ShAmt);
1097 if (!isMask_64(Mask))
1098 break;
1099 unsigned TrailingOnes = llvm::countr_one(Mask);
1100 if (ShAmt >= TrailingOnes)
1101 break;
1102 // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.
1103 if (TrailingOnes == 32) {
1104 SDNode *SRLI = CurDAG->getMachineNode(
1105 Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT,
1106 N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1107 ReplaceNode(Node, SRLI);
1108 return;
1109 }
1110
1111 // Only do the remaining transforms if the AND has one use.
1112 if (!N0.hasOneUse())
1113 break;
1114
1115 // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1116 if (HasBitTest && ShAmt + 1 == TrailingOnes) {
1117 SDNode *BEXTI = CurDAG->getMachineNode(
1118 Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT,
1119 N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1120 ReplaceNode(Node, BEXTI);
1121 return;
1122 }
1123
1124 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
1125 SDNode *SLLI =
1126 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1127 CurDAG->getTargetConstant(LShAmt, DL, VT));
1128 SDNode *SRLI = CurDAG->getMachineNode(
1129 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1130 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1131 ReplaceNode(Node, SRLI);
1132 return;
1133 }
1134 case ISD::SRA: {
1135 if (trySignedBitfieldExtract(Node))
1136 return;
1137
1138 // Optimize (sra (sext_inreg X, i16), C) ->
1139 // (srai (slli X, (XLen-16), (XLen-16) + C)
1140 // And (sra (sext_inreg X, i8), C) ->
1141 // (srai (slli X, (XLen-8), (XLen-8) + C)
1142 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1143 // This transform matches the code we get without Zbb. The shifts are more
1144 // compressible, and this can help expose CSE opportunities in the sdiv by
1145 // constant optimization.
1146 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1147 if (!N1C)
1148 break;
1149 SDValue N0 = Node->getOperand(0);
1150 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
1151 break;
1152 unsigned ShAmt = N1C->getZExtValue();
1153 unsigned ExtSize =
1154 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
1155 // ExtSize of 32 should use sraiw via tablegen pattern.
1156 if (ExtSize >= 32 || ShAmt >= ExtSize)
1157 break;
1158 unsigned LShAmt = Subtarget->getXLen() - ExtSize;
1159 SDNode *SLLI =
1160 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1161 CurDAG->getTargetConstant(LShAmt, DL, VT));
1162 SDNode *SRAI = CurDAG->getMachineNode(
1163 RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
1164 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1165 ReplaceNode(Node, SRAI);
1166 return;
1167 }
1168 case ISD::OR:
1169 case ISD::XOR:
1170 if (tryShrinkShlLogicImm(Node))
1171 return;
1172
1173 break;
1174 case ISD::AND: {
1175 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1176 if (!N1C)
1177 break;
1178 uint64_t C1 = N1C->getZExtValue();
1179 const bool isC1Mask = isMask_64(C1);
1180 const bool isC1ANDI = isInt<12>(C1);
1181
1182 SDValue N0 = Node->getOperand(0);
1183
1184 auto tryUnsignedBitfieldExtract = [&](SDNode *Node, SDLoc DL, MVT VT,
1185 SDValue X, unsigned Msb,
1186 unsigned Lsb) {
1187 if (!Subtarget->hasVendorXTHeadBb())
1188 return false;
1189
1190 SDNode *TH_EXTU = CurDAG->getMachineNode(
1191 RISCV::TH_EXTU, DL, VT, X, CurDAG->getTargetConstant(Msb, DL, VT),
1192 CurDAG->getTargetConstant(Lsb, DL, VT));
1193 ReplaceNode(Node, TH_EXTU);
1194 return true;
1195 };
1196
1197 bool LeftShift = N0.getOpcode() == ISD::SHL;
1198 if (LeftShift || N0.getOpcode() == ISD::SRL) {
1199 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1200 if (!C)
1201 break;
1202 unsigned C2 = C->getZExtValue();
1203 unsigned XLen = Subtarget->getXLen();
1204 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1205
1206 // Keep track of whether this is a c.andi. If we can't use c.andi, the
1207 // shift pair might offer more compression opportunities.
1208 // TODO: We could check for C extension here, but we don't have many lit
1209 // tests with the C extension enabled so not checking gets better
1210 // coverage.
1211 // TODO: What if ANDI faster than shift?
1212 bool IsCANDI = isInt<6>(N1C->getSExtValue());
1213
1214 // Clear irrelevant bits in the mask.
1215 if (LeftShift)
1216 C1 &= maskTrailingZeros<uint64_t>(C2);
1217 else
1218 C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
1219
1220 // Some transforms should only be done if the shift has a single use or
1221 // the AND would become (srli (slli X, 32), 32)
1222 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
1223
1224 SDValue X = N0.getOperand(0);
1225
1226 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1227 // with c3 leading zeros.
1228 if (!LeftShift && isC1Mask) {
1229 unsigned Leading = XLen - llvm::bit_width(C1);
1230 if (C2 < Leading) {
1231 // If the number of leading zeros is C2+32 this can be SRLIW.
1232 if (C2 + 32 == Leading) {
1233 SDNode *SRLIW = CurDAG->getMachineNode(
1234 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
1235 ReplaceNode(Node, SRLIW);
1236 return;
1237 }
1238
1239 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1240 // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1241 //
1242 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1243 // legalized and goes through DAG combine.
1244 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
1245 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1246 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
1247 SDNode *SRAIW =
1248 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0),
1249 CurDAG->getTargetConstant(31, DL, VT));
1250 SDNode *SRLIW = CurDAG->getMachineNode(
1251 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),
1252 CurDAG->getTargetConstant(Leading - 32, DL, VT));
1253 ReplaceNode(Node, SRLIW);
1254 return;
1255 }
1256
1257 // Try to use an unsigned bitfield extract (e.g., th.extu) if
1258 // available.
1259 // Transform (and (srl x, C2), C1)
1260 // -> (<bfextract> x, msb, lsb)
1261 //
1262 // Make sure to keep this below the SRLIW cases, as we always want to
1263 // prefer the more common instruction.
1264 const unsigned Msb = llvm::bit_width(C1) + C2 - 1;
1265 const unsigned Lsb = C2;
1266 if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
1267 return;
1268
1269 // (srli (slli x, c3-c2), c3).
1270 // Skip if we could use (zext.w (sraiw X, C2)).
1271 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
1272 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1273 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
1274 // Also Skip if we can use bexti or th.tst.
1275 Skip |= HasBitTest && Leading == XLen - 1;
1276 if (OneUseOrZExtW && !Skip) {
1277 SDNode *SLLI = CurDAG->getMachineNode(
1278 RISCV::SLLI, DL, VT, X,
1279 CurDAG->getTargetConstant(Leading - C2, DL, VT));
1280 SDNode *SRLI = CurDAG->getMachineNode(
1281 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1282 CurDAG->getTargetConstant(Leading, DL, VT));
1283 ReplaceNode(Node, SRLI);
1284 return;
1285 }
1286 }
1287 }
1288
1289 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1290 // shifted by c2 bits with c3 leading zeros.
1291 if (LeftShift && isShiftedMask_64(C1)) {
1292 unsigned Leading = XLen - llvm::bit_width(C1);
1293
1294 if (C2 + Leading < XLen &&
1295 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) {
1296 // Use slli.uw when possible.
1297 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
1298 SDNode *SLLI_UW =
1299 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X,
1300 CurDAG->getTargetConstant(C2, DL, VT));
1301 ReplaceNode(Node, SLLI_UW);
1302 return;
1303 }
1304
1305 // (srli (slli c2+c3), c3)
1306 if (OneUseOrZExtW && !IsCANDI) {
1307 SDNode *SLLI = CurDAG->getMachineNode(
1308 RISCV::SLLI, DL, VT, X,
1309 CurDAG->getTargetConstant(C2 + Leading, DL, VT));
1310 SDNode *SRLI = CurDAG->getMachineNode(
1311 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1312 CurDAG->getTargetConstant(Leading, DL, VT));
1313 ReplaceNode(Node, SRLI);
1314 return;
1315 }
1316 }
1317 }
1318
1319 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1320 // shifted mask with c2 leading zeros and c3 trailing zeros.
1321 if (!LeftShift && isShiftedMask_64(C1)) {
1322 unsigned Leading = XLen - llvm::bit_width(C1);
1323 unsigned Trailing = llvm::countr_zero(C1);
1324 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
1325 !IsCANDI) {
1326 unsigned SrliOpc = RISCV::SRLI;
1327 // If the input is zexti32 we should use SRLIW.
1328 if (X.getOpcode() == ISD::AND &&
1329 isa<ConstantSDNode>(X.getOperand(1)) &&
1330 X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
1331 SrliOpc = RISCV::SRLIW;
1332 X = X.getOperand(0);
1333 }
1334 SDNode *SRLI = CurDAG->getMachineNode(
1335 SrliOpc, DL, VT, X,
1336 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1337 SDNode *SLLI = CurDAG->getMachineNode(
1338 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1339 CurDAG->getTargetConstant(Trailing, DL, VT));
1340 ReplaceNode(Node, SLLI);
1341 return;
1342 }
1343 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1344 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
1345 OneUseOrZExtW && !IsCANDI) {
1346 SDNode *SRLIW = CurDAG->getMachineNode(
1347 RISCV::SRLIW, DL, VT, X,
1348 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1349 SDNode *SLLI = CurDAG->getMachineNode(
1350 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1351 CurDAG->getTargetConstant(Trailing, DL, VT));
1352 ReplaceNode(Node, SLLI);
1353 return;
1354 }
1355 }
1356
1357 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1358 // shifted mask with no leading zeros and c3 trailing zeros.
1359 if (LeftShift && isShiftedMask_64(C1)) {
1360 unsigned Leading = XLen - llvm::bit_width(C1);
1361 unsigned Trailing = llvm::countr_zero(C1);
1362 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
1363 SDNode *SRLI = CurDAG->getMachineNode(
1364 RISCV::SRLI, DL, VT, X,
1365 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1366 SDNode *SLLI = CurDAG->getMachineNode(
1367 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1368 CurDAG->getTargetConstant(Trailing, DL, VT));
1369 ReplaceNode(Node, SLLI);
1370 return;
1371 }
1372 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1373 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
1374 SDNode *SRLIW = CurDAG->getMachineNode(
1375 RISCV::SRLIW, DL, VT, X,
1376 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1377 SDNode *SLLI = CurDAG->getMachineNode(
1378 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1379 CurDAG->getTargetConstant(Trailing, DL, VT));
1380 ReplaceNode(Node, SLLI);
1381 return;
1382 }
1383 }
1384 }
1385
1386 // If C1 masks off the upper bits only (but can't be formed as an
1387 // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1388 // available.
1389 // Transform (and x, C1)
1390 // -> (<bfextract> x, msb, lsb)
1391 if (isC1Mask && !isC1ANDI) {
1392 const unsigned Msb = llvm::bit_width(C1) - 1;
1393 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0))
1394 return;
1395 }
1396
1397 if (tryShrinkShlLogicImm(Node))
1398 return;
1399
1400 break;
1401 }
1402 case ISD::MUL: {
1403 // Special case for calculating (mul (and X, C2), C1) where the full product
1404 // fits in XLen bits. We can shift X left by the number of leading zeros in
1405 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1406 // product has XLen trailing zeros, putting it in the output of MULHU. This
1407 // can avoid materializing a constant in a register for C2.
1408
1409 // RHS should be a constant.
1410 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1411 if (!N1C || !N1C->hasOneUse())
1412 break;
1413
1414 // LHS should be an AND with constant.
1415 SDValue N0 = Node->getOperand(0);
1416 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1417 break;
1418
1420
1421 // Constant should be a mask.
1422 if (!isMask_64(C2))
1423 break;
1424
1425 // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1426 // multiple users or the constant is a simm12. This prevents inserting a
1427 // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1428 // make it more costly to materialize. Otherwise, using a SLLI might allow
1429 // it to be compressed.
1430 bool IsANDIOrZExt =
1431 isInt<12>(C2) ||
1432 (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
1433 // With XTHeadBb, we can use TH.EXTU.
1434 IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
1435 if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse()))
1436 break;
1437 // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1438 // the constant is a simm32.
1439 bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
1440 // With XTHeadBb, we can use TH.EXTU.
1441 IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
1442 if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse()))
1443 break;
1444
1445 // We need to shift left the AND input and C1 by a total of XLen bits.
1446
1447 // How far left do we need to shift the AND input?
1448 unsigned XLen = Subtarget->getXLen();
1449 unsigned LeadingZeros = XLen - llvm::bit_width(C2);
1450
1451 // The constant gets shifted by the remaining amount unless that would
1452 // shift bits out.
1453 uint64_t C1 = N1C->getZExtValue();
1454 unsigned ConstantShift = XLen - LeadingZeros;
1455 if (ConstantShift > (XLen - llvm::bit_width(C1)))
1456 break;
1457
1458 uint64_t ShiftedC1 = C1 << ConstantShift;
1459 // If this RV32, we need to sign extend the constant.
1460 if (XLen == 32)
1461 ShiftedC1 = SignExtend64<32>(ShiftedC1);
1462
1463 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1464 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode();
1465 SDNode *SLLI =
1466 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1467 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1468 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,
1469 SDValue(SLLI, 0), SDValue(Imm, 0));
1470 ReplaceNode(Node, MULHU);
1471 return;
1472 }
1473 case ISD::LOAD: {
1474 if (tryIndexedLoad(Node))
1475 return;
1476 break;
1477 }
1479 unsigned IntNo = Node->getConstantOperandVal(0);
1480 switch (IntNo) {
1481 // By default we do not custom select any intrinsic.
1482 default:
1483 break;
1484 case Intrinsic::riscv_vmsgeu:
1485 case Intrinsic::riscv_vmsge: {
1486 SDValue Src1 = Node->getOperand(1);
1487 SDValue Src2 = Node->getOperand(2);
1488 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
1489 bool IsCmpUnsignedZero = false;
1490 // Only custom select scalar second operand.
1491 if (Src2.getValueType() != XLenVT)
1492 break;
1493 // Small constants are handled with patterns.
1494 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1495 int64_t CVal = C->getSExtValue();
1496 if (CVal >= -15 && CVal <= 16) {
1497 if (!IsUnsigned || CVal != 0)
1498 break;
1499 IsCmpUnsignedZero = true;
1500 }
1501 }
1502 MVT Src1VT = Src1.getSimpleValueType();
1503 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode;
1504 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1505 default:
1506 llvm_unreachable("Unexpected LMUL!");
1507#define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b) \
1508 case RISCVII::VLMUL::lmulenum: \
1509 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1510 : RISCV::PseudoVMSLT_VX_##suffix; \
1511 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \
1512 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \
1513 break;
1514 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F8, MF8, B1)
1515 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F4, MF4, B2)
1516 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F2, MF2, B4)
1518 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_2, M2, B16)
1519 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_4, M4, B32)
1520 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_8, M8, B64)
1521#undef CASE_VMSLT_VMNAND_VMSET_OPCODES
1522 }
1524 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1525 SDValue VL;
1526 selectVLOp(Node->getOperand(3), VL);
1527
1528 // If vmsgeu with 0 immediate, expand it to vmset.
1529 if (IsCmpUnsignedZero) {
1530 ReplaceNode(Node, CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW));
1531 return;
1532 }
1533
1534 // Expand to
1535 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
1536 SDValue Cmp = SDValue(
1537 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1538 0);
1539 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT,
1540 {Cmp, Cmp, VL, SEW}));
1541 return;
1542 }
1543 case Intrinsic::riscv_vmsgeu_mask:
1544 case Intrinsic::riscv_vmsge_mask: {
1545 SDValue Src1 = Node->getOperand(2);
1546 SDValue Src2 = Node->getOperand(3);
1547 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
1548 bool IsCmpUnsignedZero = false;
1549 // Only custom select scalar second operand.
1550 if (Src2.getValueType() != XLenVT)
1551 break;
1552 // Small constants are handled with patterns.
1553 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1554 int64_t CVal = C->getSExtValue();
1555 if (CVal >= -15 && CVal <= 16) {
1556 if (!IsUnsigned || CVal != 0)
1557 break;
1558 IsCmpUnsignedZero = true;
1559 }
1560 }
1561 MVT Src1VT = Src1.getSimpleValueType();
1562 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
1563 VMOROpcode;
1564 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1565 default:
1566 llvm_unreachable("Unexpected LMUL!");
1567#define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b) \
1568 case RISCVII::VLMUL::lmulenum: \
1569 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1570 : RISCV::PseudoVMSLT_VX_##suffix; \
1571 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \
1572 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \
1573 break;
1574 CASE_VMSLT_OPCODES(LMUL_F8, MF8, B1)
1575 CASE_VMSLT_OPCODES(LMUL_F4, MF4, B2)
1576 CASE_VMSLT_OPCODES(LMUL_F2, MF2, B4)
1577 CASE_VMSLT_OPCODES(LMUL_1, M1, B8)
1578 CASE_VMSLT_OPCODES(LMUL_2, M2, B16)
1579 CASE_VMSLT_OPCODES(LMUL_4, M4, B32)
1580 CASE_VMSLT_OPCODES(LMUL_8, M8, B64)
1581#undef CASE_VMSLT_OPCODES
1582 }
1583 // Mask operations use the LMUL from the mask type.
1584 switch (RISCVTargetLowering::getLMUL(VT)) {
1585 default:
1586 llvm_unreachable("Unexpected LMUL!");
1587#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \
1588 case RISCVII::VLMUL::lmulenum: \
1589 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \
1590 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \
1591 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \
1592 break;
1593 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, MF8)
1594 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, MF4)
1595 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, MF2)
1600#undef CASE_VMXOR_VMANDN_VMOR_OPCODES
1601 }
1603 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1604 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
1605 SDValue VL;
1606 selectVLOp(Node->getOperand(5), VL);
1607 SDValue MaskedOff = Node->getOperand(1);
1608 SDValue Mask = Node->getOperand(4);
1609
1610 // If vmsgeu_mask with 0 immediate, expand it to vmor mask, maskedoff.
1611 if (IsCmpUnsignedZero) {
1612 // We don't need vmor if the MaskedOff and the Mask are the same
1613 // value.
1614 if (Mask == MaskedOff) {
1615 ReplaceUses(Node, Mask.getNode());
1616 return;
1617 }
1618 ReplaceNode(Node,
1619 CurDAG->getMachineNode(VMOROpcode, DL, VT,
1620 {Mask, MaskedOff, VL, MaskSEW}));
1621 return;
1622 }
1623
1624 // If the MaskedOff value and the Mask are the same value use
1625 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
1626 // This avoids needing to copy v0 to vd before starting the next sequence.
1627 if (Mask == MaskedOff) {
1628 SDValue Cmp = SDValue(
1629 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1630 0);
1631 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
1632 {Mask, Cmp, VL, MaskSEW}));
1633 return;
1634 }
1635
1636 // Mask needs to be copied to V0.
1638 RISCV::V0, Mask, SDValue());
1639 SDValue Glue = Chain.getValue(1);
1640 SDValue V0 = CurDAG->getRegister(RISCV::V0, VT);
1641
1642 // Otherwise use
1643 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
1644 // The result is mask undisturbed.
1645 // We use the same instructions to emulate mask agnostic behavior, because
1646 // the agnostic result can be either undisturbed or all 1.
1647 SDValue Cmp = SDValue(
1648 CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
1649 {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}),
1650 0);
1651 // vmxor.mm vd, vd, v0 is used to update active value.
1652 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
1653 {Cmp, Mask, VL, MaskSEW}));
1654 return;
1655 }
1656 case Intrinsic::riscv_vsetvli:
1657 case Intrinsic::riscv_vsetvlimax:
1658 return selectVSETVLI(Node);
1659 }
1660 break;
1661 }
1663 unsigned IntNo = Node->getConstantOperandVal(1);
1664 switch (IntNo) {
1665 // By default we do not custom select any intrinsic.
1666 default:
1667 break;
1668 case Intrinsic::riscv_vlseg2:
1669 case Intrinsic::riscv_vlseg3:
1670 case Intrinsic::riscv_vlseg4:
1671 case Intrinsic::riscv_vlseg5:
1672 case Intrinsic::riscv_vlseg6:
1673 case Intrinsic::riscv_vlseg7:
1674 case Intrinsic::riscv_vlseg8: {
1675 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);
1676 return;
1677 }
1678 case Intrinsic::riscv_vlseg2_mask:
1679 case Intrinsic::riscv_vlseg3_mask:
1680 case Intrinsic::riscv_vlseg4_mask:
1681 case Intrinsic::riscv_vlseg5_mask:
1682 case Intrinsic::riscv_vlseg6_mask:
1683 case Intrinsic::riscv_vlseg7_mask:
1684 case Intrinsic::riscv_vlseg8_mask: {
1685 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);
1686 return;
1687 }
1688 case Intrinsic::riscv_vlsseg2:
1689 case Intrinsic::riscv_vlsseg3:
1690 case Intrinsic::riscv_vlsseg4:
1691 case Intrinsic::riscv_vlsseg5:
1692 case Intrinsic::riscv_vlsseg6:
1693 case Intrinsic::riscv_vlsseg7:
1694 case Intrinsic::riscv_vlsseg8: {
1695 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);
1696 return;
1697 }
1698 case Intrinsic::riscv_vlsseg2_mask:
1699 case Intrinsic::riscv_vlsseg3_mask:
1700 case Intrinsic::riscv_vlsseg4_mask:
1701 case Intrinsic::riscv_vlsseg5_mask:
1702 case Intrinsic::riscv_vlsseg6_mask:
1703 case Intrinsic::riscv_vlsseg7_mask:
1704 case Intrinsic::riscv_vlsseg8_mask: {
1705 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);
1706 return;
1707 }
1708 case Intrinsic::riscv_vloxseg2:
1709 case Intrinsic::riscv_vloxseg3:
1710 case Intrinsic::riscv_vloxseg4:
1711 case Intrinsic::riscv_vloxseg5:
1712 case Intrinsic::riscv_vloxseg6:
1713 case Intrinsic::riscv_vloxseg7:
1714 case Intrinsic::riscv_vloxseg8:
1715 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);
1716 return;
1717 case Intrinsic::riscv_vluxseg2:
1718 case Intrinsic::riscv_vluxseg3:
1719 case Intrinsic::riscv_vluxseg4:
1720 case Intrinsic::riscv_vluxseg5:
1721 case Intrinsic::riscv_vluxseg6:
1722 case Intrinsic::riscv_vluxseg7:
1723 case Intrinsic::riscv_vluxseg8:
1724 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);
1725 return;
1726 case Intrinsic::riscv_vloxseg2_mask:
1727 case Intrinsic::riscv_vloxseg3_mask:
1728 case Intrinsic::riscv_vloxseg4_mask:
1729 case Intrinsic::riscv_vloxseg5_mask:
1730 case Intrinsic::riscv_vloxseg6_mask:
1731 case Intrinsic::riscv_vloxseg7_mask:
1732 case Intrinsic::riscv_vloxseg8_mask:
1733 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);
1734 return;
1735 case Intrinsic::riscv_vluxseg2_mask:
1736 case Intrinsic::riscv_vluxseg3_mask:
1737 case Intrinsic::riscv_vluxseg4_mask:
1738 case Intrinsic::riscv_vluxseg5_mask:
1739 case Intrinsic::riscv_vluxseg6_mask:
1740 case Intrinsic::riscv_vluxseg7_mask:
1741 case Intrinsic::riscv_vluxseg8_mask:
1742 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
1743 return;
1744 case Intrinsic::riscv_vlseg8ff:
1745 case Intrinsic::riscv_vlseg7ff:
1746 case Intrinsic::riscv_vlseg6ff:
1747 case Intrinsic::riscv_vlseg5ff:
1748 case Intrinsic::riscv_vlseg4ff:
1749 case Intrinsic::riscv_vlseg3ff:
1750 case Intrinsic::riscv_vlseg2ff: {
1751 selectVLSEGFF(Node, /*IsMasked*/ false);
1752 return;
1753 }
1754 case Intrinsic::riscv_vlseg8ff_mask:
1755 case Intrinsic::riscv_vlseg7ff_mask:
1756 case Intrinsic::riscv_vlseg6ff_mask:
1757 case Intrinsic::riscv_vlseg5ff_mask:
1758 case Intrinsic::riscv_vlseg4ff_mask:
1759 case Intrinsic::riscv_vlseg3ff_mask:
1760 case Intrinsic::riscv_vlseg2ff_mask: {
1761 selectVLSEGFF(Node, /*IsMasked*/ true);
1762 return;
1763 }
1764 case Intrinsic::riscv_vloxei:
1765 case Intrinsic::riscv_vloxei_mask:
1766 case Intrinsic::riscv_vluxei:
1767 case Intrinsic::riscv_vluxei_mask: {
1768 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
1769 IntNo == Intrinsic::riscv_vluxei_mask;
1770 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
1771 IntNo == Intrinsic::riscv_vloxei_mask;
1772
1773 MVT VT = Node->getSimpleValueType(0);
1774 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1775
1776 unsigned CurOp = 2;
1778 Operands.push_back(Node->getOperand(CurOp++));
1779
1780 MVT IndexVT;
1781 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1782 /*IsStridedOrIndexed*/ true, Operands,
1783 /*IsLoad=*/true, &IndexVT);
1784
1786 "Element count mismatch");
1787
1789 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
1790 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
1791 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
1792 report_fatal_error("The V extension does not support EEW=64 for index "
1793 "values when XLEN=32");
1794 }
1795 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
1796 IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
1797 static_cast<unsigned>(IndexLMUL));
1798 MachineSDNode *Load =
1799 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1800
1801 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1802 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1803
1804 ReplaceNode(Node, Load);
1805 return;
1806 }
1807 case Intrinsic::riscv_vlm:
1808 case Intrinsic::riscv_vle:
1809 case Intrinsic::riscv_vle_mask:
1810 case Intrinsic::riscv_vlse:
1811 case Intrinsic::riscv_vlse_mask: {
1812 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
1813 IntNo == Intrinsic::riscv_vlse_mask;
1814 bool IsStrided =
1815 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
1816
1817 MVT VT = Node->getSimpleValueType(0);
1818 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1819
1820 // The riscv_vlm intrinsic are always tail agnostic and no passthru
1821 // operand at the IR level. In pseudos, they have both policy and
1822 // passthru operand. The passthru operand is needed to track the
1823 // "tail undefined" state, and the policy is there just for
1824 // for consistency - it will always be "don't care" for the
1825 // unmasked form.
1826 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
1827 unsigned CurOp = 2;
1829 if (HasPassthruOperand)
1830 Operands.push_back(Node->getOperand(CurOp++));
1831 else {
1832 // We eagerly lower to implicit_def (instead of undef), as we
1833 // otherwise fail to select nodes such as: nxv1i1 = undef
1834 SDNode *Passthru =
1835 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
1836 Operands.push_back(SDValue(Passthru, 0));
1837 }
1838 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
1839 Operands, /*IsLoad=*/true);
1840
1842 const RISCV::VLEPseudo *P =
1843 RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW,
1844 static_cast<unsigned>(LMUL));
1845 MachineSDNode *Load =
1846 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1847
1848 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1849 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1850
1851 ReplaceNode(Node, Load);
1852 return;
1853 }
1854 case Intrinsic::riscv_vleff:
1855 case Intrinsic::riscv_vleff_mask: {
1856 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
1857
1858 MVT VT = Node->getSimpleValueType(0);
1859 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1860
1861 unsigned CurOp = 2;
1863 Operands.push_back(Node->getOperand(CurOp++));
1864 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1865 /*IsStridedOrIndexed*/ false, Operands,
1866 /*IsLoad=*/true);
1867
1869 const RISCV::VLEPseudo *P =
1870 RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true,
1871 Log2SEW, static_cast<unsigned>(LMUL));
1873 P->Pseudo, DL, Node->getVTList(), Operands);
1874 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1875 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1876
1877 ReplaceNode(Node, Load);
1878 return;
1879 }
1880 }
1881 break;
1882 }
1883 case ISD::INTRINSIC_VOID: {
1884 unsigned IntNo = Node->getConstantOperandVal(1);
1885 switch (IntNo) {
1886 case Intrinsic::riscv_vsseg2:
1887 case Intrinsic::riscv_vsseg3:
1888 case Intrinsic::riscv_vsseg4:
1889 case Intrinsic::riscv_vsseg5:
1890 case Intrinsic::riscv_vsseg6:
1891 case Intrinsic::riscv_vsseg7:
1892 case Intrinsic::riscv_vsseg8: {
1893 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);
1894 return;
1895 }
1896 case Intrinsic::riscv_vsseg2_mask:
1897 case Intrinsic::riscv_vsseg3_mask:
1898 case Intrinsic::riscv_vsseg4_mask:
1899 case Intrinsic::riscv_vsseg5_mask:
1900 case Intrinsic::riscv_vsseg6_mask:
1901 case Intrinsic::riscv_vsseg7_mask:
1902 case Intrinsic::riscv_vsseg8_mask: {
1903 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);
1904 return;
1905 }
1906 case Intrinsic::riscv_vssseg2:
1907 case Intrinsic::riscv_vssseg3:
1908 case Intrinsic::riscv_vssseg4:
1909 case Intrinsic::riscv_vssseg5:
1910 case Intrinsic::riscv_vssseg6:
1911 case Intrinsic::riscv_vssseg7:
1912 case Intrinsic::riscv_vssseg8: {
1913 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);
1914 return;
1915 }
1916 case Intrinsic::riscv_vssseg2_mask:
1917 case Intrinsic::riscv_vssseg3_mask:
1918 case Intrinsic::riscv_vssseg4_mask:
1919 case Intrinsic::riscv_vssseg5_mask:
1920 case Intrinsic::riscv_vssseg6_mask:
1921 case Intrinsic::riscv_vssseg7_mask:
1922 case Intrinsic::riscv_vssseg8_mask: {
1923 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);
1924 return;
1925 }
1926 case Intrinsic::riscv_vsoxseg2:
1927 case Intrinsic::riscv_vsoxseg3:
1928 case Intrinsic::riscv_vsoxseg4:
1929 case Intrinsic::riscv_vsoxseg5:
1930 case Intrinsic::riscv_vsoxseg6:
1931 case Intrinsic::riscv_vsoxseg7:
1932 case Intrinsic::riscv_vsoxseg8:
1933 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);
1934 return;
1935 case Intrinsic::riscv_vsuxseg2:
1936 case Intrinsic::riscv_vsuxseg3:
1937 case Intrinsic::riscv_vsuxseg4:
1938 case Intrinsic::riscv_vsuxseg5:
1939 case Intrinsic::riscv_vsuxseg6:
1940 case Intrinsic::riscv_vsuxseg7:
1941 case Intrinsic::riscv_vsuxseg8:
1942 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);
1943 return;
1944 case Intrinsic::riscv_vsoxseg2_mask:
1945 case Intrinsic::riscv_vsoxseg3_mask:
1946 case Intrinsic::riscv_vsoxseg4_mask:
1947 case Intrinsic::riscv_vsoxseg5_mask:
1948 case Intrinsic::riscv_vsoxseg6_mask:
1949 case Intrinsic::riscv_vsoxseg7_mask:
1950 case Intrinsic::riscv_vsoxseg8_mask:
1951 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);
1952 return;
1953 case Intrinsic::riscv_vsuxseg2_mask:
1954 case Intrinsic::riscv_vsuxseg3_mask:
1955 case Intrinsic::riscv_vsuxseg4_mask:
1956 case Intrinsic::riscv_vsuxseg5_mask:
1957 case Intrinsic::riscv_vsuxseg6_mask:
1958 case Intrinsic::riscv_vsuxseg7_mask:
1959 case Intrinsic::riscv_vsuxseg8_mask:
1960 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
1961 return;
1962 case Intrinsic::riscv_vsoxei:
1963 case Intrinsic::riscv_vsoxei_mask:
1964 case Intrinsic::riscv_vsuxei:
1965 case Intrinsic::riscv_vsuxei_mask: {
1966 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
1967 IntNo == Intrinsic::riscv_vsuxei_mask;
1968 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
1969 IntNo == Intrinsic::riscv_vsoxei_mask;
1970
1971 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
1972 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1973
1974 unsigned CurOp = 2;
1976 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
1977
1978 MVT IndexVT;
1979 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1980 /*IsStridedOrIndexed*/ true, Operands,
1981 /*IsLoad=*/false, &IndexVT);
1982
1984 "Element count mismatch");
1985
1987 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
1988 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
1989 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
1990 report_fatal_error("The V extension does not support EEW=64 for index "
1991 "values when XLEN=32");
1992 }
1993 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
1994 IsMasked, IsOrdered, IndexLog2EEW,
1995 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
1996 MachineSDNode *Store =
1997 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1998
1999 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2000 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
2001
2002 ReplaceNode(Node, Store);
2003 return;
2004 }
2005 case Intrinsic::riscv_vsm:
2006 case Intrinsic::riscv_vse:
2007 case Intrinsic::riscv_vse_mask:
2008 case Intrinsic::riscv_vsse:
2009 case Intrinsic::riscv_vsse_mask: {
2010 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
2011 IntNo == Intrinsic::riscv_vsse_mask;
2012 bool IsStrided =
2013 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
2014
2015 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2016 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2017
2018 unsigned CurOp = 2;
2020 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2021
2022 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2023 Operands);
2024
2026 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
2027 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
2028 MachineSDNode *Store =
2029 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2030 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2031 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
2032
2033 ReplaceNode(Node, Store);
2034 return;
2035 }
2036 case Intrinsic::riscv_sf_vc_x_se:
2037 case Intrinsic::riscv_sf_vc_i_se:
2038 selectSF_VC_X_SE(Node);
2039 return;
2040 }
2041 break;
2042 }
2043 case ISD::BITCAST: {
2044 MVT SrcVT = Node->getOperand(0).getSimpleValueType();
2045 // Just drop bitcasts between vectors if both are fixed or both are
2046 // scalable.
2047 if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
2048 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
2049 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2050 CurDAG->RemoveDeadNode(Node);
2051 return;
2052 }
2053 break;
2054 }
2055 case ISD::INSERT_SUBVECTOR: {
2056 SDValue V = Node->getOperand(0);
2057 SDValue SubV = Node->getOperand(1);
2058 SDLoc DL(SubV);
2059 auto Idx = Node->getConstantOperandVal(2);
2060 MVT SubVecVT = SubV.getSimpleValueType();
2061
2062 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2063 MVT SubVecContainerVT = SubVecVT;
2064 // Establish the correct scalable-vector types for any fixed-length type.
2065 if (SubVecVT.isFixedLengthVector()) {
2066 assert(Idx == 0 && V.isUndef());
2067 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
2068 }
2069 if (VT.isFixedLengthVector())
2070 VT = TLI.getContainerForFixedLengthVector(VT);
2071
2072 const auto *TRI = Subtarget->getRegisterInfo();
2073 unsigned SubRegIdx;
2074 std::tie(SubRegIdx, Idx) =
2076 VT, SubVecContainerVT, Idx, TRI);
2077
2078 // If the Idx hasn't been completely eliminated then this is a subvector
2079 // insert which doesn't naturally align to a vector register. These must
2080 // be handled using instructions to manipulate the vector registers.
2081 if (Idx != 0)
2082 break;
2083
2084 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT);
2085 [[maybe_unused]] bool IsSubVecPartReg =
2086 SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
2087 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
2088 SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
2089 assert((!IsSubVecPartReg || V.isUndef()) &&
2090 "Expecting lowering to have created legal INSERT_SUBVECTORs when "
2091 "the subvector is smaller than a full-sized register");
2092
2093 // If we haven't set a SubRegIdx, then we must be going between
2094 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
2095 if (SubRegIdx == RISCV::NoSubRegister) {
2096 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(VT);
2098 InRegClassID &&
2099 "Unexpected subvector extraction");
2100 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2101 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
2102 DL, VT, SubV, RC);
2103 ReplaceNode(Node, NewNode);
2104 return;
2105 }
2106
2107 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV);
2108 ReplaceNode(Node, Insert.getNode());
2109 return;
2110 }
2112 SDValue V = Node->getOperand(0);
2113 auto Idx = Node->getConstantOperandVal(1);
2114 MVT InVT = V.getSimpleValueType();
2115 SDLoc DL(V);
2116
2117 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2118 MVT SubVecContainerVT = VT;
2119 // Establish the correct scalable-vector types for any fixed-length type.
2120 if (VT.isFixedLengthVector()) {
2121 assert(Idx == 0);
2122 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
2123 }
2124 if (InVT.isFixedLengthVector())
2125 InVT = TLI.getContainerForFixedLengthVector(InVT);
2126
2127 const auto *TRI = Subtarget->getRegisterInfo();
2128 unsigned SubRegIdx;
2129 std::tie(SubRegIdx, Idx) =
2131 InVT, SubVecContainerVT, Idx, TRI);
2132
2133 // If the Idx hasn't been completely eliminated then this is a subvector
2134 // extract which doesn't naturally align to a vector register. These must
2135 // be handled using instructions to manipulate the vector registers.
2136 if (Idx != 0)
2137 break;
2138
2139 // If we haven't set a SubRegIdx, then we must be going between
2140 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
2141 if (SubRegIdx == RISCV::NoSubRegister) {
2142 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT);
2144 InRegClassID &&
2145 "Unexpected subvector extraction");
2146 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2147 SDNode *NewNode =
2148 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
2149 ReplaceNode(Node, NewNode);
2150 return;
2151 }
2152
2153 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
2154 ReplaceNode(Node, Extract.getNode());
2155 return;
2156 }
2160 case RISCVISD::VFMV_V_F_VL: {
2161 // Try to match splat of a scalar load to a strided load with stride of x0.
2162 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
2163 Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
2164 if (!Node->getOperand(0).isUndef())
2165 break;
2166 SDValue Src = Node->getOperand(1);
2167 auto *Ld = dyn_cast<LoadSDNode>(Src);
2168 // Can't fold load update node because the second
2169 // output is used so that load update node can't be removed.
2170 if (!Ld || Ld->isIndexed())
2171 break;
2172 EVT MemVT = Ld->getMemoryVT();
2173 // The memory VT should be the same size as the element type.
2174 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
2175 break;
2176 if (!IsProfitableToFold(Src, Node, Node) ||
2177 !IsLegalToFold(Src, Node, Node, TM.getOptLevel()))
2178 break;
2179
2180 SDValue VL;
2181 if (IsScalarMove) {
2182 // We could deal with more VL if we update the VSETVLI insert pass to
2183 // avoid introducing more VSETVLI.
2184 if (!isOneConstant(Node->getOperand(2)))
2185 break;
2186 selectVLOp(Node->getOperand(2), VL);
2187 } else
2188 selectVLOp(Node->getOperand(2), VL);
2189
2190 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2191 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
2192
2193 // If VL=1, then we don't need to do a strided load and can just do a
2194 // regular load.
2195 bool IsStrided = !isOneConstant(VL);
2196
2197 // Only do a strided load if we have optimized zero-stride vector load.
2198 if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
2199 break;
2200
2202 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0),
2203 Ld->getBasePtr()};
2204 if (IsStrided)
2205 Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT));
2207 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
2208 Operands.append({VL, SEW, PolicyOp, Ld->getChain()});
2209
2211 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
2212 /*IsMasked*/ false, IsStrided, /*FF*/ false,
2213 Log2SEW, static_cast<unsigned>(LMUL));
2214 MachineSDNode *Load =
2215 CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands);
2216 // Update the chain.
2217 ReplaceUses(Src.getValue(1), SDValue(Load, 1));
2218 // Record the mem-refs
2219 CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});
2220 // Replace the splat with the vlse.
2221 ReplaceNode(Node, Load);
2222 return;
2223 }
2224 case ISD::PREFETCH:
2225 unsigned Locality = Node->getConstantOperandVal(3);
2226 if (Locality > 2)
2227 break;
2228
2229 if (auto *LoadStoreMem = dyn_cast<MemSDNode>(Node)) {
2230 MachineMemOperand *MMO = LoadStoreMem->getMemOperand();
2232
2233 int NontemporalLevel = 0;
2234 switch (Locality) {
2235 case 0:
2236 NontemporalLevel = 3; // NTL.ALL
2237 break;
2238 case 1:
2239 NontemporalLevel = 1; // NTL.PALL
2240 break;
2241 case 2:
2242 NontemporalLevel = 0; // NTL.P1
2243 break;
2244 default:
2245 llvm_unreachable("unexpected locality value.");
2246 }
2247
2248 if (NontemporalLevel & 0b1)
2250 if (NontemporalLevel & 0b10)
2252 }
2253 break;
2254 }
2255
2256 // Select the default instruction.
2257 SelectCode(Node);
2258}
2259
2261 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
2262 std::vector<SDValue> &OutOps) {
2263 // Always produce a register and immediate operand, as expected by
2264 // RISCVAsmPrinter::PrintAsmMemoryOperand.
2265 switch (ConstraintID) {
2268 SDValue Op0, Op1;
2269 [[maybe_unused]] bool Found = SelectAddrRegImm(Op, Op0, Op1);
2270 assert(Found && "SelectAddrRegImm should always succeed");
2271 OutOps.push_back(Op0);
2272 OutOps.push_back(Op1);
2273 return false;
2274 }
2276 OutOps.push_back(Op);
2277 OutOps.push_back(
2278 CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT()));
2279 return false;
2280 default:
2281 report_fatal_error("Unexpected asm memory constraint " +
2282 InlineAsm::getMemConstraintName(ConstraintID));
2283 }
2284
2285 return true;
2286}
2287
2289 SDValue &Offset) {
2290 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
2291 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
2292 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT());
2293 return true;
2294 }
2295
2296 return false;
2297}
2298
2299// Select a frame index and an optional immediate offset from an ADD or OR.
2301 SDValue &Offset) {
2303 return true;
2304
2306 return false;
2307
2308 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
2309 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2310 if (isInt<12>(CVal)) {
2311 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(),
2312 Subtarget->getXLenVT());
2314 Subtarget->getXLenVT());
2315 return true;
2316 }
2317 }
2318
2319 return false;
2320}
2321
2322// Fold constant addresses.
2323static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
2324 const MVT VT, const RISCVSubtarget *Subtarget,
2326 bool IsPrefetch = false) {
2327 if (!isa<ConstantSDNode>(Addr))
2328 return false;
2329
2330 int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue();
2331
2332 // If the constant is a simm12, we can fold the whole constant and use X0 as
2333 // the base. If the constant can be materialized with LUI+simm12, use LUI as
2334 // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
2335 int64_t Lo12 = SignExtend64<12>(CVal);
2336 int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
2337 if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
2338 if (IsPrefetch && (Lo12 & 0b11111) != 0)
2339 return false;
2340
2341 if (Hi) {
2342 int64_t Hi20 = (Hi >> 12) & 0xfffff;
2343 Base = SDValue(
2344 CurDAG->getMachineNode(RISCV::LUI, DL, VT,
2345 CurDAG->getTargetConstant(Hi20, DL, VT)),
2346 0);
2347 } else {
2348 Base = CurDAG->getRegister(RISCV::X0, VT);
2349 }
2350 Offset = CurDAG->getTargetConstant(Lo12, DL, VT);
2351 return true;
2352 }
2353
2354 // Ask how constant materialization would handle this constant.
2355 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget);
2356
2357 // If the last instruction would be an ADDI, we can fold its immediate and
2358 // emit the rest of the sequence as the base.
2359 if (Seq.back().getOpcode() != RISCV::ADDI)
2360 return false;
2361 Lo12 = Seq.back().getImm();
2362 if (IsPrefetch && (Lo12 & 0b11111) != 0)
2363 return false;
2364
2365 // Drop the last instruction.
2366 Seq.pop_back();
2367 assert(!Seq.empty() && "Expected more instructions in sequence");
2368
2369 Base = selectImmSeq(CurDAG, DL, VT, Seq);
2370 Offset = CurDAG->getTargetConstant(Lo12, DL, VT);
2371 return true;
2372}
2373
2374// Is this ADD instruction only used as the base pointer of scalar loads and
2375// stores?
2377 for (auto *Use : Add->uses()) {
2378 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
2379 Use->getOpcode() != ISD::ATOMIC_LOAD &&
2380 Use->getOpcode() != ISD::ATOMIC_STORE)
2381 return false;
2382 EVT VT = cast<MemSDNode>(Use)->getMemoryVT();
2383 if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
2384 VT != MVT::f64)
2385 return false;
2386 // Don't allow stores of the value. It must be used as the address.
2387 if (Use->getOpcode() == ISD::STORE &&
2388 cast<StoreSDNode>(Use)->getValue() == Add)
2389 return false;
2390 if (Use->getOpcode() == ISD::ATOMIC_STORE &&
2391 cast<AtomicSDNode>(Use)->getVal() == Add)
2392 return false;
2393 }
2394
2395 return true;
2396}
2397
2399 unsigned MaxShiftAmount,
2401 SDValue &Scale) {
2402 EVT VT = Addr.getSimpleValueType();
2403 auto UnwrapShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
2404 SDValue &Shift) {
2405 uint64_t ShiftAmt = 0;
2406 Index = N;
2407
2408 if (N.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N.getOperand(1))) {
2409 // Only match shifts by a value in range [0, MaxShiftAmount].
2410 if (N.getConstantOperandVal(1) <= MaxShiftAmount) {
2411 Index = N.getOperand(0);
2412 ShiftAmt = N.getConstantOperandVal(1);
2413 }
2414 }
2415
2416 Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT);
2417 return ShiftAmt != 0;
2418 };
2419
2420 if (Addr.getOpcode() == ISD::ADD) {
2421 if (auto *C1 = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
2422 SDValue AddrB = Addr.getOperand(0);
2423 if (AddrB.getOpcode() == ISD::ADD &&
2424 UnwrapShl(AddrB.getOperand(0), Index, Scale) &&
2425 !isa<ConstantSDNode>(AddrB.getOperand(1)) &&
2426 isInt<12>(C1->getSExtValue())) {
2427 // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
2428 SDValue C1Val =
2429 CurDAG->getTargetConstant(C1->getZExtValue(), SDLoc(Addr), VT);
2430 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
2431 AddrB.getOperand(1), C1Val),
2432 0);
2433 return true;
2434 }
2435 } else if (UnwrapShl(Addr.getOperand(0), Index, Scale)) {
2436 Base = Addr.getOperand(1);
2437 return true;
2438 } else {
2439 UnwrapShl(Addr.getOperand(1), Index, Scale);
2440 Base = Addr.getOperand(0);
2441 return true;
2442 }
2443 } else if (UnwrapShl(Addr, Index, Scale)) {
2444 EVT VT = Addr.getValueType();
2445 Base = CurDAG->getRegister(RISCV::X0, VT);
2446 return true;
2447 }
2448
2449 return false;
2450}
2451
2453 SDValue &Offset, bool IsINX) {
2455 return true;
2456
2457 SDLoc DL(Addr);
2458 MVT VT = Addr.getSimpleValueType();
2459
2460 if (Addr.getOpcode() == RISCVISD::ADD_LO) {
2461 Base = Addr.getOperand(0);
2462 Offset = Addr.getOperand(1);
2463 return true;
2464 }
2465
2466 int64_t RV32ZdinxRange = IsINX ? 4 : 0;
2468 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2469 if (isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) {
2470 Base = Addr.getOperand(0);
2471 if (Base.getOpcode() == RISCVISD::ADD_LO) {
2472 SDValue LoOperand = Base.getOperand(1);
2473 if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) {
2474 // If the Lo in (ADD_LO hi, lo) is a global variable's address
2475 // (its low part, really), then we can rely on the alignment of that
2476 // variable to provide a margin of safety before low part can overflow
2477 // the 12 bits of the load/store offset. Check if CVal falls within
2478 // that margin; if so (low part + CVal) can't overflow.
2479 const DataLayout &DL = CurDAG->getDataLayout();
2480 Align Alignment = commonAlignment(
2481 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
2482 if (CVal == 0 || Alignment > CVal) {
2483 int64_t CombinedOffset = CVal + GA->getOffset();
2484 Base = Base.getOperand(0);
2486 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(),
2487 CombinedOffset, GA->getTargetFlags());
2488 return true;
2489 }
2490 }
2491 }
2492
2493 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2494 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2495 Offset = CurDAG->getTargetConstant(CVal, DL, VT);
2496 return true;
2497 }
2498 }
2499
2500 // Handle ADD with large immediates.
2501 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2502 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2503 assert(!(isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) &&
2504 "simm12 not already handled?");
2505
2506 // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
2507 // an ADDI for part of the offset and fold the rest into the load/store.
2508 // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
2509 if (isInt<12>(CVal / 2) && isInt<12>(CVal - CVal / 2)) {
2510 int64_t Adj = CVal < 0 ? -2048 : 2047;
2511 Base = SDValue(
2512 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
2513 CurDAG->getTargetConstant(Adj, DL, VT)),
2514 0);
2515 Offset = CurDAG->getTargetConstant(CVal - Adj, DL, VT);
2516 return true;
2517 }
2518
2519 // For larger immediates, we might be able to save one instruction from
2520 // constant materialization by folding the Lo12 bits of the immediate into
2521 // the address. We should only do this if the ADD is only used by loads and
2522 // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
2523 // separately with the full materialized immediate creating extra
2524 // instructions.
2525 if (isWorthFoldingAdd(Addr) &&
2526 selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2527 Offset)) {
2528 // Insert an ADD instruction with the materialized Hi52 bits.
2529 Base = SDValue(
2530 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2531 0);
2532 return true;
2533 }
2534 }
2535
2536 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset))
2537 return true;
2538
2539 Base = Addr;
2540 Offset = CurDAG->getTargetConstant(0, DL, VT);
2541 return true;
2542}
2543
2544/// Similar to SelectAddrRegImm, except that the least significant 5 bits of
2545/// Offset shoule be all zeros.
2547 SDValue &Offset) {
2549 return true;
2550
2551 SDLoc DL(Addr);
2552 MVT VT = Addr.getSimpleValueType();
2553
2555 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2556 if (isInt<12>(CVal)) {
2557 Base = Addr.getOperand(0);
2558
2559 // Early-out if not a valid offset.
2560 if ((CVal & 0b11111) != 0) {
2561 Base = Addr;
2562 Offset = CurDAG->getTargetConstant(0, DL, VT);
2563 return true;
2564 }
2565
2566 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2567 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2568 Offset = CurDAG->getTargetConstant(CVal, DL, VT);
2569 return true;
2570 }
2571 }
2572
2573 // Handle ADD with large immediates.
2574 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2575 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2576 assert(!(isInt<12>(CVal) && isInt<12>(CVal)) &&
2577 "simm12 not already handled?");
2578
2579 // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
2580 // one instruction by folding adjustment (-2048 or 2016) into the address.
2581 if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {
2582 int64_t Adj = CVal < 0 ? -2048 : 2016;
2583 int64_t AdjustedOffset = CVal - Adj;
2585 RISCV::ADDI, DL, VT, Addr.getOperand(0),
2586 CurDAG->getTargetConstant(AdjustedOffset, DL, VT)),
2587 0);
2588 Offset = CurDAG->getTargetConstant(Adj, DL, VT);
2589 return true;
2590 }
2591
2592 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2593 Offset, true)) {
2594 // Insert an ADD instruction with the materialized Hi52 bits.
2595 Base = SDValue(
2596 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2597 0);
2598 return true;
2599 }
2600 }
2601
2602 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset, true))
2603 return true;
2604
2605 Base = Addr;
2606 Offset = CurDAG->getTargetConstant(0, DL, VT);
2607 return true;
2608}
2609
2611 SDValue &ShAmt) {
2612 ShAmt = N;
2613
2614 // Peek through zext.
2615 if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)
2616 ShAmt = ShAmt.getOperand(0);
2617
2618 // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
2619 // amount. If there is an AND on the shift amount, we can bypass it if it
2620 // doesn't affect any of those bits.
2621 if (ShAmt.getOpcode() == ISD::AND &&
2622 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2623 const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
2624
2625 // Since the max shift amount is a power of 2 we can subtract 1 to make a
2626 // mask that covers the bits needed to represent all shift amounts.
2627 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
2628 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
2629
2630 if (ShMask.isSubsetOf(AndMask)) {
2631 ShAmt = ShAmt.getOperand(0);
2632 } else {
2633 // SimplifyDemandedBits may have optimized the mask so try restoring any
2634 // bits that are known zero.
2635 KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0));
2636 if (!ShMask.isSubsetOf(AndMask | Known.Zero))
2637 return true;
2638 ShAmt = ShAmt.getOperand(0);
2639 }
2640 }
2641
2642 if (ShAmt.getOpcode() == ISD::ADD &&
2643 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2644 uint64_t Imm = ShAmt.getConstantOperandVal(1);
2645 // If we are shifting by X+N where N == 0 mod Size, then just shift by X
2646 // to avoid the ADD.
2647 if (Imm != 0 && Imm % ShiftWidth == 0) {
2648 ShAmt = ShAmt.getOperand(0);
2649 return true;
2650 }
2651 } else if (ShAmt.getOpcode() == ISD::SUB &&
2652 isa<ConstantSDNode>(ShAmt.getOperand(0))) {
2653 uint64_t Imm = ShAmt.getConstantOperandVal(0);
2654 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
2655 // generate a NEG instead of a SUB of a constant.
2656 if (Imm != 0 && Imm % ShiftWidth == 0) {
2657 SDLoc DL(ShAmt);
2658 EVT VT = ShAmt.getValueType();
2659 SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);
2660 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
2661 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
2662 ShAmt.getOperand(1));
2663 ShAmt = SDValue(Neg, 0);
2664 return true;
2665 }
2666 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
2667 // to generate a NOT instead of a SUB of a constant.
2668 if (Imm % ShiftWidth == ShiftWidth - 1) {
2669 SDLoc DL(ShAmt);
2670 EVT VT = ShAmt.getValueType();
2671 MachineSDNode *Not =
2672 CurDAG->getMachineNode(RISCV::XORI, DL, VT, ShAmt.getOperand(1),
2673 CurDAG->getTargetConstant(-1, DL, VT));
2674 ShAmt = SDValue(Not, 0);
2675 return true;
2676 }
2677 }
2678
2679 return true;
2680}
2681
2682/// RISC-V doesn't have general instructions for integer setne/seteq, but we can
2683/// check for equality with 0. This function emits instructions that convert the
2684/// seteq/setne into something that can be compared with 0.
2685/// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.
2686/// ISD::SETNE).
2688 SDValue &Val) {
2689 assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
2690 "Unexpected condition code!");
2691
2692 // We're looking for a setcc.
2693 if (N->getOpcode() != ISD::SETCC)
2694 return false;
2695
2696 // Must be an equality comparison.
2697 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
2698 if (CCVal != ExpectedCCVal)
2699 return false;
2700
2701 SDValue LHS = N->getOperand(0);
2702 SDValue RHS = N->getOperand(1);
2703
2704 if (!LHS.getValueType().isScalarInteger())
2705 return false;
2706
2707 // If the RHS side is 0, we don't need any extra instructions, return the LHS.
2708 if (isNullConstant(RHS)) {
2709 Val = LHS;
2710 return true;
2711 }
2712
2713 SDLoc DL(N);
2714
2715 if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
2716 int64_t CVal = C->getSExtValue();
2717 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
2718 // non-zero otherwise.
2719 if (CVal == -2048) {
2720 Val =
2722 RISCV::XORI, DL, N->getValueType(0), LHS,
2723 CurDAG->getTargetConstant(CVal, DL, N->getValueType(0))),
2724 0);
2725 return true;
2726 }
2727 // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the
2728 // LHS is equal to the RHS and non-zero otherwise.
2729 if (isInt<12>(CVal) || CVal == 2048) {
2730 Val =
2732 RISCV::ADDI, DL, N->getValueType(0), LHS,
2733 CurDAG->getTargetConstant(-CVal, DL, N->getValueType(0))),
2734 0);
2735 return true;
2736 }
2737 }
2738
2739 // If nothing else we can XOR the LHS and RHS to produce zero if they are
2740 // equal and a non-zero value if they aren't.
2741 Val = SDValue(
2742 CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0);
2743 return true;
2744}
2745
2747 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
2748 cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) {
2749 Val = N.getOperand(0);
2750 return true;
2751 }
2752
2753 auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
2754 if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1)))
2755 return N;
2756
2757 SDValue N0 = N.getOperand(0);
2758 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
2759 N.getConstantOperandVal(1) == ShiftAmt &&
2760 N0.getConstantOperandVal(1) == ShiftAmt)
2761 return N0.getOperand(0);
2762
2763 return N;
2764 };
2765
2766 MVT VT = N.getSimpleValueType();
2767 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) {
2768 Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
2769 return true;
2770 }
2771
2772 return false;
2773}
2774
2776 if (N.getOpcode() == ISD::AND) {
2777 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
2778 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
2779 Val = N.getOperand(0);
2780 return true;
2781 }
2782 }
2783 MVT VT = N.getSimpleValueType();
2784 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits);
2785 if (CurDAG->MaskedValueIsZero(N, Mask)) {
2786 Val = N;
2787 return true;
2788 }
2789
2790 return false;
2791}
2792
2793/// Look for various patterns that can be done with a SHL that can be folded
2794/// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
2795/// SHXADD we are trying to match.
2797 SDValue &Val) {
2798 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
2799 SDValue N0 = N.getOperand(0);
2800
2801 bool LeftShift = N0.getOpcode() == ISD::SHL;
2802 if ((LeftShift || N0.getOpcode() == ISD::SRL) &&
2803 isa<ConstantSDNode>(N0.getOperand(1))) {
2804 uint64_t Mask = N.getConstantOperandVal(1);
2805 unsigned C2 = N0.getConstantOperandVal(1);
2806
2807 unsigned XLen = Subtarget->getXLen();
2808 if (LeftShift)
2809 Mask &= maskTrailingZeros<uint64_t>(C2);
2810 else
2811 Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
2812
2813 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
2814 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
2815 // followed by a SHXADD with c3 for the X amount.
2816 if (isShiftedMask_64(Mask)) {
2817 unsigned Leading = XLen - llvm::bit_width(Mask);
2818 unsigned Trailing = llvm::countr_zero(Mask);
2819 if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) {
2820 SDLoc DL(N);
2821 EVT VT = N.getValueType();
2823 RISCV::SRLI, DL, VT, N0.getOperand(0),
2824 CurDAG->getTargetConstant(Trailing - C2, DL, VT)),
2825 0);
2826 return true;
2827 }
2828 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
2829 // leading zeros and c3 trailing zeros. We can use an SRLI by C3
2830 // followed by a SHXADD using c3 for the X amount.
2831 if (!LeftShift && Leading == C2 && Trailing == ShAmt) {
2832 SDLoc DL(N);
2833 EVT VT = N.getValueType();
2834 Val = SDValue(
2836 RISCV::SRLI, DL, VT, N0.getOperand(0),
2837 CurDAG->getTargetConstant(Leading + Trailing, DL, VT)),
2838 0);
2839 return true;
2840 }
2841 }
2842 }
2843 }
2844
2845 bool LeftShift = N.getOpcode() == ISD::SHL;
2846 if ((LeftShift || N.getOpcode() == ISD::SRL) &&
2847 isa<ConstantSDNode>(N.getOperand(1))) {
2848 SDValue N0 = N.getOperand(0);
2849 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
2850 isa<ConstantSDNode>(N0.getOperand(1))) {
2851 uint64_t Mask = N0.getConstantOperandVal(1);
2852 if (isShiftedMask_64(Mask)) {
2853 unsigned C1 = N.getConstantOperandVal(1);
2854 unsigned XLen = Subtarget->getXLen();
2855 unsigned Leading = XLen - llvm::bit_width(Mask);
2856 unsigned Trailing = llvm::countr_zero(Mask);
2857 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
2858 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
2859 if (LeftShift && Leading == 32 && Trailing > 0 &&
2860 (Trailing + C1) == ShAmt) {
2861 SDLoc DL(N);
2862 EVT VT = N.getValueType();
2864 RISCV::SRLIW, DL, VT, N0.getOperand(0),
2865 CurDAG->getTargetConstant(Trailing, DL, VT)),
2866 0);
2867 return true;
2868 }
2869 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
2870 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
2871 if (!LeftShift && Leading == 32 && Trailing > C1 &&
2872 (Trailing - C1) == ShAmt) {
2873 SDLoc DL(N);
2874 EVT VT = N.getValueType();
2876 RISCV::SRLIW, DL, VT, N0.getOperand(0),
2877 CurDAG->getTargetConstant(Trailing, DL, VT)),
2878 0);
2879 return true;
2880 }
2881 }
2882 }
2883 }
2884
2885 return false;
2886}
2887
2888/// Look for various patterns that can be done with a SHL that can be folded
2889/// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
2890/// SHXADD_UW we are trying to match.
2892 SDValue &Val) {
2893 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) &&
2894 N.hasOneUse()) {
2895 SDValue N0 = N.getOperand(0);
2896 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
2897 N0.hasOneUse()) {
2898 uint64_t Mask = N.getConstantOperandVal(1);
2899 unsigned C2 = N0.getConstantOperandVal(1);
2900
2901 Mask &= maskTrailingZeros<uint64_t>(C2);
2902
2903 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
2904 // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
2905 // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
2906 if (isShiftedMask_64(Mask)) {
2907 unsigned Leading = llvm::countl_zero(Mask);
2908 unsigned Trailing = llvm::countr_zero(Mask);
2909 if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
2910 SDLoc DL(N);
2911 EVT VT = N.getValueType();
2913 RISCV::SLLI, DL, VT, N0.getOperand(0),
2914 CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)),
2915 0);
2916 return true;
2917 }
2918 }
2919 }
2920 }
2921
2922 return false;
2923}
2924
2925static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
2926 unsigned Bits,
2927 const TargetInstrInfo *TII) {
2928 unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode());
2929
2930 if (!MCOpcode)
2931 return false;
2932
2933 const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
2934 const uint64_t TSFlags = MCID.TSFlags;
2935 if (!RISCVII::hasSEWOp(TSFlags))
2936 return false;
2937 assert(RISCVII::hasVLOp(TSFlags));
2938
2939 bool HasGlueOp = User->getGluedNode() != nullptr;
2940 unsigned ChainOpIdx = User->getNumOperands() - HasGlueOp - 1;
2941 bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other;
2942 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
2943 unsigned VLIdx =
2944 User->getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
2945 const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1);
2946
2947 if (UserOpNo == VLIdx)
2948 return false;
2949
2950 auto NumDemandedBits =
2951 RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW);
2952 return NumDemandedBits && Bits >= *NumDemandedBits;
2953}
2954
2955// Return true if all users of this SDNode* only consume the lower \p Bits.
2956// This can be used to form W instructions for add/sub/mul/shl even when the
2957// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
2958// SimplifyDemandedBits has made it so some users see a sext_inreg and some
2959// don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
2960// the add/sub/mul/shl to become non-W instructions. By checking the users we
2961// may be able to use a W instruction and CSE with the other instruction if
2962// this has happened. We could try to detect that the CSE opportunity exists
2963// before doing this, but that would be more complicated.
2965 const unsigned Depth) const {
2966 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
2967 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
2968 Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
2969 Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
2970 Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
2971 isa<ConstantSDNode>(Node) || Depth != 0) &&
2972 "Unexpected opcode");
2973
2975 return false;
2976
2977 // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
2978 // the VT. Ensure the type is scalar to avoid wasting time on vectors.
2979 if (Depth == 0 && !Node->getValueType(0).isScalarInteger())
2980 return false;
2981
2982 for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) {
2983 SDNode *User = *UI;
2984 // Users of this node should have already been instruction selected
2985 if (!User->isMachineOpcode())
2986 return false;
2987
2988 // TODO: Add more opcodes?
2989 switch (User->getMachineOpcode()) {
2990 default:
2991 if (vectorPseudoHasAllNBitUsers(User, UI.getOperandNo(), Bits, TII))
2992 break;
2993 return false;
2994 case RISCV::ADDW:
2995 case RISCV::ADDIW:
2996 case RISCV::SUBW:
2997 case RISCV::MULW:
2998 case RISCV::SLLW:
2999 case RISCV::SLLIW:
3000 case RISCV::SRAW:
3001 case RISCV::SRAIW:
3002 case RISCV::SRLW:
3003 case RISCV::SRLIW:
3004 case RISCV::DIVW:
3005 case RISCV::DIVUW:
3006 case RISCV::REMW:
3007 case RISCV::REMUW:
3008 case RISCV::ROLW:
3009 case RISCV::RORW:
3010 case RISCV::RORIW:
3011 case RISCV::CLZW:
3012 case RISCV::CTZW:
3013 case RISCV::CPOPW:
3014 case RISCV::SLLI_UW:
3015 case RISCV::FMV_W_X:
3016 case RISCV::FCVT_H_W:
3017 case RISCV::FCVT_H_WU:
3018 case RISCV::FCVT_S_W:
3019 case RISCV::FCVT_S_WU:
3020 case RISCV::FCVT_D_W:
3021 case RISCV::FCVT_D_WU:
3022 case RISCV::TH_REVW:
3023 case RISCV::TH_SRRIW:
3024 if (Bits < 32)
3025 return false;
3026 break;
3027 case RISCV::SLL:
3028 case RISCV::SRA:
3029 case RISCV::SRL:
3030 case RISCV::ROL:
3031 case RISCV::ROR:
3032 case RISCV::BSET:
3033 case RISCV::BCLR:
3034 case RISCV::BINV:
3035 // Shift amount operands only use log2(Xlen) bits.
3036 if (UI.getOperandNo() != 1 || Bits < Log2_32(Subtarget->getXLen()))
3037 return false;
3038 break;
3039 case RISCV::SLLI:
3040 // SLLI only uses the lower (XLen - ShAmt) bits.
3041 if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(1))
3042 return false;
3043 break;
3044 case RISCV::ANDI:
3045 if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1)))
3046 break;
3047 goto RecCheck;
3048 case RISCV::ORI: {
3049 uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
3050 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
3051 break;
3052 [[fallthrough]];
3053 }
3054 case RISCV::AND:
3055 case RISCV::OR:
3056 case RISCV::XOR:
3057 case RISCV::XORI:
3058 case RISCV::ANDN:
3059 case RISCV::ORN:
3060 case RISCV::XNOR:
3061 case RISCV::SH1ADD:
3062 case RISCV::SH2ADD:
3063 case RISCV::SH3ADD:
3064 RecCheck:
3065 if (hasAllNBitUsers(User, Bits, Depth + 1))
3066 break;
3067 return false;
3068 case RISCV::SRLI: {
3069 unsigned ShAmt = User->getConstantOperandVal(1);
3070 // If we are shifting right by less than Bits, and users don't demand any
3071 // bits that were shifted into [Bits-1:0], then we can consider this as an
3072 // N-Bit user.
3073 if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1))
3074 break;
3075 return false;
3076 }
3077 case RISCV::SEXT_B:
3078 case RISCV::PACKH:
3079 if (Bits < 8)
3080 return false;
3081 break;
3082 case RISCV::SEXT_H:
3083 case RISCV::FMV_H_X:
3084 case RISCV::ZEXT_H_RV32:
3085 case RISCV::ZEXT_H_RV64:
3086 case RISCV::PACKW:
3087 if (Bits < 16)
3088 return false;
3089 break;
3090 case RISCV::PACK:
3091 if (Bits < (Subtarget->getXLen() / 2))
3092 return false;
3093 break;
3094 case RISCV::ADD_UW:
3095 case RISCV::SH1ADD_UW:
3096 case RISCV::SH2ADD_UW:
3097 case RISCV::SH3ADD_UW:
3098 // The first operand to add.uw/shXadd.uw is implicitly zero extended from
3099 // 32 bits.
3100 if (UI.getOperandNo() != 0 || Bits < 32)
3101 return false;
3102 break;
3103 case RISCV::SB:
3104 if (UI.getOperandNo() != 0 || Bits < 8)
3105 return false;
3106 break;
3107 case RISCV::SH:
3108 if (UI.getOperandNo() != 0 || Bits < 16)
3109 return false;
3110 break;
3111 case RISCV::SW:
3112 if (UI.getOperandNo() != 0 || Bits < 32)
3113 return false;
3114 break;
3115 }
3116 }
3117
3118 return true;
3119}
3120
3121// Select a constant that can be represented as (sign_extend(imm5) << imm2).
3123 SDValue &Shl2) {
3124 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3125 int64_t Offset = C->getSExtValue();
3126 int64_t Shift;
3127 for (Shift = 0; Shift < 4; Shift++)
3128 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
3129 break;
3130
3131 // Constant cannot be encoded.
3132 if (Shift == 4)
3133 return false;
3134
3135 EVT Ty = N->getValueType(0);
3136 Simm5 = CurDAG->getTargetConstant(Offset >> Shift, SDLoc(N), Ty);
3137 Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), Ty);
3138 return true;
3139 }
3140
3141 return false;
3142}
3143
3144// Select VL as a 5 bit immediate or a value that will become a register. This
3145// allows us to choose betwen VSETIVLI or VSETVLI later.
3147 auto *C = dyn_cast<ConstantSDNode>(N);
3148 if (C && isUInt<5>(C->getZExtValue())) {
3149 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
3150 N->getValueType(0));
3151 } else if (C && C->isAllOnes()) {
3152 // Treat all ones as VLMax.
3154 N->getValueType(0));
3155 } else if (isa<RegisterSDNode>(N) &&
3156 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {
3157 // All our VL operands use an operand that allows GPRNoX0 or an immediate
3158 // as the register class. Convert X0 to a special immediate to pass the
3159 // MachineVerifier. This is recognized specially by the vsetvli insertion
3160 // pass.
3162 N->getValueType(0));
3163 } else {
3164 VL = N;
3165 }
3166
3167 return true;
3168}
3169
3171 if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {
3172 if (!N.getOperand(0).isUndef())
3173 return SDValue();
3174 N = N.getOperand(1);
3175 }
3176 SDValue Splat = N;
3177 if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&
3178 Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||
3179 !Splat.getOperand(0).isUndef())
3180 return SDValue();
3181 assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");
3182 return Splat;
3183}
3184
3187 if (!Splat)
3188 return false;
3189
3190 SplatVal = Splat.getOperand(1);
3191 return true;
3192}
3193
3195 SelectionDAG &DAG,
3196 const RISCVSubtarget &Subtarget,
3197 std::function<bool(int64_t)> ValidateImm) {
3199 if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1)))
3200 return false;
3201
3202 const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();
3203 assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&
3204 "Unexpected splat operand type");
3205
3206 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
3207 // type is wider than the resulting vector element type: an implicit
3208 // truncation first takes place. Therefore, perform a manual
3209 // truncation/sign-extension in order to ignore any truncated bits and catch
3210 // any zero-extended immediate.
3211 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
3212 // sign-extending to (XLenVT -1).
3213 APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize);
3214
3215 int64_t SplatImm = SplatConst.getSExtValue();
3216
3217 if (!ValidateImm(SplatImm))
3218 return false;
3219
3220 SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT());
3221 return true;
3222}
3223
3225 return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget,
3226 [](int64_t Imm) { return isInt<5>(Imm); });
3227}
3228
3230 return selectVSplatImmHelper(
3231 N, SplatVal, *CurDAG, *Subtarget,
3232 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; });
3233}
3234
3236 SDValue &SplatVal) {
3237 return selectVSplatImmHelper(
3238 N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) {
3239 return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16);
3240 });
3241}
3242
3244 SDValue &SplatVal) {
3245 return selectVSplatImmHelper(
3246 N, SplatVal, *CurDAG, *Subtarget,
3247 [Bits](int64_t Imm) { return isUIntN(Bits, Imm); });
3248}
3249
3251 // Truncates are custom lowered during legalization.
3252 auto IsTrunc = [this](SDValue N) {
3253 if (N->getOpcode() != RISCVISD::TRUNCATE_VECTOR_VL)
3254 return false;
3255 SDValue VL;
3256 selectVLOp(N->getOperand(2), VL);
3257 // Any vmset_vl is ok, since any bits past VL are undefined and we can
3258 // assume they are set.
3259 return N->getOperand(1).getOpcode() == RISCVISD::VMSET_VL &&
3260 isa<ConstantSDNode>(VL) &&
3261 cast<ConstantSDNode>(VL)->getSExtValue() == RISCV::VLMaxSentinel;
3262 };
3263
3264 // We can have multiple nested truncates, so unravel them all if needed.
3265 while (N->getOpcode() == ISD::SIGN_EXTEND ||
3266 N->getOpcode() == ISD::ZERO_EXTEND || IsTrunc(N)) {
3267 if (!N.hasOneUse() ||
3268 N.getValueType().getSizeInBits().getKnownMinValue() < 8)
3269 return false;
3270 N = N->getOperand(0);
3271 }
3272
3273 return selectVSplat(N, SplatVal);
3274}
3275
3277 ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N.getNode());
3278 if (!CFP)
3279 return false;
3280 const APFloat &APF = CFP->getValueAPF();
3281 // td can handle +0.0 already.
3282 if (APF.isPosZero())
3283 return false;
3284
3285 MVT VT = CFP->getSimpleValueType(0);
3286
3287 // Even if this FPImm requires an additional FNEG (i.e. the second element of
3288 // the returned pair is true) we still prefer FLI + FNEG over immediate
3289 // materialization as the latter might generate a longer instruction sequence.
3290 if (static_cast<const RISCVTargetLowering *>(TLI)
3291 ->getLegalZfaFPImm(APF, VT)
3292 .first >= 0)
3293 return false;
3294
3295 MVT XLenVT = Subtarget->getXLenVT();
3296 if (VT == MVT::f64 && !Subtarget->is64Bit()) {
3297 assert(APF.isNegZero() && "Unexpected constant.");
3298 return false;
3299 }
3300 SDLoc DL(N);
3301 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
3302 *Subtarget);
3303 return true;
3304}
3305
3307 SDValue &Imm) {
3308 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3309 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);
3310
3311 if (!isInt<5>(ImmVal))
3312 return false;
3313
3314 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT());
3315 return true;
3316 }
3317
3318 return false;
3319}
3320
3321// Try to remove sext.w if the input is a W instruction or can be made into
3322// a W instruction cheaply.
3323bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
3324 // Look for the sext.w pattern, addiw rd, rs1, 0.
3325 if (N->getMachineOpcode() != RISCV::ADDIW ||
3326 !isNullConstant(N->getOperand(1)))
3327 return false;
3328
3329 SDValue N0 = N->getOperand(0);
3330 if (!N0.isMachineOpcode())
3331 return false;
3332
3333 switch (N0.getMachineOpcode()) {
3334 default:
3335 break;
3336 case RISCV::ADD:
3337 case RISCV::ADDI:
3338 case RISCV::SUB:
3339 case RISCV::MUL:
3340 case RISCV::SLLI: {
3341 // Convert sext.w+add/sub/mul to their W instructions. This will create
3342 // a new independent instruction. This improves latency.
3343 unsigned Opc;
3344 switch (N0.getMachineOpcode()) {
3345 default:
3346 llvm_unreachable("Unexpected opcode!");
3347 case RISCV::ADD: Opc = RISCV::ADDW; break;
3348 case RISCV::ADDI: Opc = RISCV::ADDIW; break;
3349 case RISCV::SUB: Opc = RISCV::SUBW; break;
3350 case RISCV::MUL: Opc = RISCV::MULW; break;
3351 case RISCV::SLLI: Opc = RISCV::SLLIW; break;
3352 }
3353
3354 SDValue N00 = N0.getOperand(0);
3355 SDValue N01 = N0.getOperand(1);
3356
3357 // Shift amount needs to be uimm5.
3358 if (N0.getMachineOpcode() == RISCV::SLLI &&
3359 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
3360 break;
3361
3362 SDNode *Result =
3363 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
3364 N00, N01);
3365 ReplaceUses(N, Result);
3366 return true;
3367 }
3368 case RISCV::ADDW:
3369 case RISCV::ADDIW:
3370 case RISCV::SUBW:
3371 case RISCV::MULW:
3372 case RISCV::SLLIW:
3373 case RISCV::PACKW:
3374 case RISCV::TH_MULAW:
3375 case RISCV::TH_MULAH:
3376 case RISCV::TH_MULSW:
3377 case RISCV::TH_MULSH:
3378 if (N0.getValueType() == MVT::i32)
3379 break;
3380
3381 // Result is already sign extended just remove the sext.w.
3382 // NOTE: We only handle the nodes that are selected with hasAllWUsers.
3383 ReplaceUses(N, N0.getNode());
3384 return true;
3385 }
3386
3387 return false;
3388}
3389
3390static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp) {
3391 // Check that we're using V0 as a mask register.
3392 if (!isa<RegisterSDNode>(MaskOp) ||
3393 cast<RegisterSDNode>(MaskOp)->getReg() != RISCV::V0)
3394 return false;
3395
3396 // The glued user defines V0.
3397 const auto *Glued = GlueOp.getNode();
3398
3399 if (!Glued || Glued->getOpcode() != ISD::CopyToReg)
3400 return false;
3401
3402 // Check that we're defining V0 as a mask register.
3403 if (!isa<RegisterSDNode>(Glued->getOperand(1)) ||
3404 cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0)
3405 return false;
3406
3407 // Check the instruction defining V0; it needs to be a VMSET pseudo.
3408 SDValue MaskSetter = Glued->getOperand(2);
3409
3410 // Sometimes the VMSET is wrapped in a COPY_TO_REGCLASS, e.g. if the mask came
3411 // from an extract_subvector or insert_subvector.
3412 if (MaskSetter->isMachineOpcode() &&
3413 MaskSetter->getMachineOpcode() == RISCV::COPY_TO_REGCLASS)
3414 MaskSetter = MaskSetter->getOperand(0);
3415
3416 const auto IsVMSet = [](unsigned Opc) {
3417 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
3418 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
3419 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
3420 Opc == RISCV::PseudoVMSET_M_B8;
3421 };
3422
3423 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
3424 // undefined behaviour if it's the wrong bitwidth, so we could choose to
3425 // assume that it's all-ones? Same applies to its VL.
3426 return MaskSetter->isMachineOpcode() &&
3427 IsVMSet(MaskSetter.getMachineOpcode());
3428}
3429
3430// Return true if we can make sure mask of N is all-ones mask.
3431static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) {
3432 return usesAllOnesMask(N->getOperand(MaskOpIdx),
3433 N->getOperand(N->getNumOperands() - 1));
3434}
3435
3436static bool isImplicitDef(SDValue V) {
3437 return V.isMachineOpcode() &&
3438 V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
3439}
3440
3441// Optimize masked RVV pseudo instructions with a known all-ones mask to their
3442// corresponding "unmasked" pseudo versions. The mask we're interested in will
3443// take the form of a V0 physical register operand, with a glued
3444// register-setting instruction.
3445bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
3447 RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
3448 if (!I)
3449 return false;
3450
3451 unsigned MaskOpIdx = I->MaskOpIdx;
3452 if (!usesAllOnesMask(N, MaskOpIdx))
3453 return false;
3454
3455 // There are two classes of pseudos in the table - compares and
3456 // everything else. See the comment on RISCVMaskedPseudo for details.
3457 const unsigned Opc = I->UnmaskedPseudo;
3458 const MCInstrDesc &MCID = TII->get(Opc);
3459 const bool UseTUPseudo = RISCVII::hasVecPolicyOp(MCID.TSFlags);
3460#ifndef NDEBUG
3461 const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode());
3464 "Masked and unmasked pseudos are inconsistent");
3465 const bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(MCID);
3466 assert(UseTUPseudo == HasTiedDest && "Unexpected pseudo structure");
3467#endif
3468
3470 // Skip the merge operand at index 0 if !UseTUPseudo.
3471 for (unsigned I = !UseTUPseudo, E = N->getNumOperands(); I != E; I++) {
3472 // Skip the mask, and the Glue.
3473 SDValue Op = N->getOperand(I);
3474 if (I == MaskOpIdx || Op.getValueType() == MVT::Glue)
3475 continue;
3476 Ops.push_back(Op);
3477 }
3478
3479 // Transitively apply any node glued to our new node.
3480 const auto *Glued = N->getGluedNode();
3481 if (auto *TGlued = Glued->getGluedNode())
3482 Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1));
3483
3485 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3486
3487 if (!N->memoperands_empty())
3488 CurDAG->setNodeMemRefs(Result, N->memoperands());
3489
3490 Result->setFlags(N->getFlags());
3491 ReplaceUses(N, Result);
3492
3493 return true;
3494}
3495
3496static bool IsVMerge(SDNode *N) {
3497 return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMERGE_VVM;
3498}
3499
3500static bool IsVMv(SDNode *N) {
3501 return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMV_V_V;
3502}
3503
3504static unsigned GetVMSetForLMul(RISCVII::VLMUL LMUL) {
3505 switch (LMUL) {
3506 case RISCVII::LMUL_F8:
3507 return RISCV::PseudoVMSET_M_B1;
3508 case RISCVII::LMUL_F4:
3509 return RISCV::PseudoVMSET_M_B2;
3510 case RISCVII::LMUL_F2:
3511 return RISCV::PseudoVMSET_M_B4;
3512 case RISCVII::LMUL_1:
3513 return RISCV::PseudoVMSET_M_B8;
3514 case RISCVII::LMUL_2:
3515 return RISCV::PseudoVMSET_M_B16;
3516 case RISCVII::LMUL_4:
3517 return RISCV::PseudoVMSET_M_B32;
3518 case RISCVII::LMUL_8:
3519 return RISCV::PseudoVMSET_M_B64;
3521 llvm_unreachable("Unexpected LMUL");
3522 }
3523 llvm_unreachable("Unknown VLMUL enum");
3524}
3525
3526// Try to fold away VMERGE_VVM instructions into their true operands:
3527//
3528// %true = PseudoVADD_VV ...
3529// %x = PseudoVMERGE_VVM %false, %false, %true, %mask
3530// ->
3531// %x = PseudoVADD_VV_MASK %false, ..., %mask
3532//
3533// We can only fold if vmerge's merge operand, vmerge's false operand and
3534// %true's merge operand (if it has one) are the same. This is because we have
3535// to consolidate them into one merge operand in the result.
3536//
3537// If %true is masked, then we can use its mask instead of vmerge's if vmerge's
3538// mask is all ones.
3539//
3540// We can also fold a VMV_V_V into its true operand, since it is equivalent to a
3541// VMERGE_VVM with an all ones mask.
3542//
3543// The resulting VL is the minimum of the two VLs.
3544//
3545// The resulting policy is the effective policy the vmerge would have had,
3546// i.e. whether or not it's merge operand was implicit-def.
3547bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
3548 SDValue Merge, False, True, VL, Mask, Glue;
3549 // A vmv.v.v is equivalent to a vmerge with an all-ones mask.
3550 if (IsVMv(N)) {
3551 Merge = N->getOperand(0);
3552 False = N->getOperand(0);
3553 True = N->getOperand(1);
3554 VL = N->getOperand(2);
3555 // A vmv.v.v won't have a Mask or Glue, instead we'll construct an all-ones
3556 // mask later below.
3557 } else {
3558 assert(IsVMerge(N));
3559 Merge = N->getOperand(0);
3560 False = N->getOperand(1);
3561 True = N->getOperand(2);
3562 Mask = N->getOperand(3);
3563 VL = N->getOperand(4);
3564 // We always have a glue node for the mask at v0.
3565 Glue = N->getOperand(N->getNumOperands() - 1);
3566 }
3567 assert(!Mask || cast<RegisterSDNode>(Mask)->getReg() == RISCV::V0);
3568 assert(!Glue || Glue.getValueType() == MVT::Glue);
3569
3570 // We require that either merge and false are the same, or that merge
3571 // is undefined.
3572 if (Merge != False && !isImplicitDef(Merge))
3573 return false;
3574
3575 assert(True.getResNo() == 0 &&
3576 "Expect True is the first output of an instruction.");
3577
3578 // Need N is the exactly one using True.
3579 if (!True.hasOneUse())
3580 return false;
3581
3582 if (!True.isMachineOpcode())
3583 return false;
3584
3585 unsigned TrueOpc = True.getMachineOpcode();
3586 const MCInstrDesc &TrueMCID = TII->get(TrueOpc);
3587 uint64_t TrueTSFlags = TrueMCID.TSFlags;
3588 bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(TrueMCID);
3589
3590 bool IsMasked = false;
3592 RISCV::lookupMaskedIntrinsicByUnmasked(TrueOpc);
3593 if (!Info && HasTiedDest) {
3594 Info = RISCV::getMaskedPseudoInfo(TrueOpc);
3595 IsMasked = true;
3596 }
3597
3598 if (!Info)
3599 return false;
3600
3601 // When Mask is not a true mask, this transformation is illegal for some
3602 // operations whose results are affected by mask, like viota.m.
3603 if (Info->MaskAffectsResult && Mask && !usesAllOnesMask(Mask, Glue))
3604 return false;
3605
3606 // If True has a merge operand then it needs to be the same as vmerge's False,
3607 // since False will be used for the result's merge operand.
3608 if (HasTiedDest && !isImplicitDef(True->getOperand(0))) {
3609 // The vmerge instruction must be TU.
3610 // FIXME: This could be relaxed, but we need to handle the policy for the
3611 // resulting op correctly.
3612 if (isImplicitDef(Merge))
3613 return false;
3614 SDValue MergeOpTrue = True->getOperand(0);
3615 if (False != MergeOpTrue)
3616 return false;
3617 }
3618
3619 // If True is masked then the vmerge must have an all 1s mask, since we're
3620 // going to keep the mask from True.
3621 if (IsMasked) {
3622 assert(HasTiedDest && "Expected tied dest");
3623 // The vmerge instruction must be TU.
3624 if (isImplicitDef(Merge))
3625 return false;
3626 // FIXME: Support mask agnostic True instruction which would have an
3627 // undef merge operand.
3628 if (Mask && !usesAllOnesMask(Mask, Glue))
3629 return false;
3630 }
3631
3632 // Skip if True has side effect.
3633 // TODO: Support vleff and vlsegff.
3634 if (TII->get(TrueOpc).hasUnmodeledSideEffects())
3635 return false;
3636
3637 // The last operand of a masked instruction may be glued.
3638 bool HasGlueOp = True->getGluedNode() != nullptr;
3639
3640 // The chain operand may exist either before the glued operands or in the last
3641 // position.
3642 unsigned TrueChainOpIdx = True.getNumOperands() - HasGlueOp - 1;
3643 bool HasChainOp =
3644 True.getOperand(TrueChainOpIdx).getValueType() == MVT::Other;
3645
3646 if (HasChainOp) {
3647 // Avoid creating cycles in the DAG. We must ensure that none of the other
3648 // operands depend on True through it's Chain.
3649 SmallVector<const SDNode *, 4> LoopWorklist;
3651 LoopWorklist.push_back(False.getNode());
3652 if (Mask)
3653 LoopWorklist.push_back(Mask.getNode());
3654 LoopWorklist.push_back(VL.getNode());
3655 if (Glue)
3656 LoopWorklist.push_back(Glue.getNode());
3657 if (SDNode::hasPredecessorHelper(True.getNode(), Visited, LoopWorklist))
3658 return false;
3659 }
3660
3661 // The vector policy operand may be present for masked intrinsics
3662 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TrueTSFlags);
3663 unsigned TrueVLIndex =
3664 True.getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
3665 SDValue TrueVL = True.getOperand(TrueVLIndex);
3666 SDValue SEW = True.getOperand(TrueVLIndex + 1);
3667
3668 auto GetMinVL = [](SDValue LHS, SDValue RHS) {
3669 if (LHS == RHS)
3670 return LHS;
3671 if (isAllOnesConstant(LHS))
3672 return RHS;
3673 if (isAllOnesConstant(RHS))
3674 return LHS;
3675 auto *CLHS = dyn_cast<ConstantSDNode>(LHS);
3676 auto *CRHS = dyn_cast<ConstantSDNode>(RHS);
3677 if (!CLHS || !CRHS)
3678 return SDValue();
3679 return CLHS->getZExtValue() <= CRHS->getZExtValue() ? LHS : RHS;
3680 };
3681
3682 // Because N and True must have the same merge operand (or True's operand is
3683 // implicit_def), the "effective" body is the minimum of their VLs.
3684 SDValue OrigVL = VL;
3685 VL = GetMinVL(TrueVL, VL);
3686 if (!VL)
3687 return false;
3688
3689 // If we end up changing the VL or mask of True, then we need to make sure it
3690 // doesn't raise any observable fp exceptions, since changing the active
3691 // elements will affect how fflags is set.
3692 if (TrueVL != VL || !IsMasked)
3693 if (mayRaiseFPException(True.getNode()) &&
3694 !True->getFlags().hasNoFPExcept())
3695 return false;
3696
3697 SDLoc DL(N);
3698
3699 // From the preconditions we checked above, we know the mask and thus glue
3700 // for the result node will be taken from True.
3701 if (IsMasked) {
3702 Mask = True->getOperand(Info->MaskOpIdx);
3703 Glue = True->getOperand(True->getNumOperands() - 1);
3704 assert(Glue.getValueType() == MVT::Glue);
3705 }
3706 // If we end up using the vmerge mask the vmerge is actually a vmv.v.v, create
3707 // an all-ones mask to use.
3708 else if (IsVMv(N)) {
3709 unsigned TSFlags = TII->get(N->getMachineOpcode()).TSFlags;
3710 unsigned VMSetOpc = GetVMSetForLMul(RISCVII::getLMul(TSFlags));
3711 ElementCount EC = N->getValueType(0).getVectorElementCount();
3712 MVT MaskVT = MVT::getVectorVT(MVT::i1, EC);
3713
3714 SDValue AllOnesMask =
3715 SDValue(CurDAG->getMachineNode(VMSetOpc, DL, MaskVT, VL, SEW), 0);
3717 RISCV::V0, AllOnesMask, SDValue());
3718 Mask = CurDAG->getRegister(RISCV::V0, MaskVT);
3719 Glue = MaskCopy.getValue(1);
3720 }
3721
3722 unsigned MaskedOpc = Info->MaskedPseudo;
3723#ifndef NDEBUG
3724 const MCInstrDesc &MaskedMCID = TII->get(MaskedOpc);
3726 "Expected instructions with mask have policy operand.");
3727 assert(MaskedMCID.getOperandConstraint(MaskedMCID.getNumDefs(),
3728 MCOI::TIED_TO) == 0 &&
3729 "Expected instructions with mask have a tied dest.");
3730#endif
3731
3732 // Use a tumu policy, relaxing it to tail agnostic provided that the merge
3733 // operand is undefined.
3734 //
3735 // However, if the VL became smaller than what the vmerge had originally, then
3736 // elements past VL that were previously in the vmerge's body will have moved
3737 // to the tail. In that case we always need to use tail undisturbed to
3738 // preserve them.
3739 bool MergeVLShrunk = VL != OrigVL;
3740 uint64_t Policy = (isImplicitDef(Merge) && !MergeVLShrunk)
3742 : /*TUMU*/ 0;
3743 SDValue PolicyOp =
3744 CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT());
3745
3746
3748 Ops.push_back(False);
3749
3750 const bool HasRoundingMode = RISCVII::hasRoundModeOp(TrueTSFlags);
3751 const unsigned NormalOpsEnd = TrueVLIndex - IsMasked - HasRoundingMode;
3752 assert(!IsMasked || NormalOpsEnd == Info->MaskOpIdx);
3753 Ops.append(True->op_begin() + HasTiedDest, True->op_begin() + NormalOpsEnd);
3754
3755 Ops.push_back(Mask);
3756
3757 // For unmasked "VOp" with rounding mode operand, that is interfaces like
3758 // (..., rm, vl) or (..., rm, vl, policy).
3759 // Its masked version is (..., vm, rm, vl, policy).
3760 // Check the rounding mode pseudo nodes under RISCVInstrInfoVPseudos.td
3761 if (HasRoundingMode)
3762 Ops.push_back(True->getOperand(TrueVLIndex - 1));
3763
3764 Ops.append({VL, SEW, PolicyOp});
3765
3766 // Result node should have chain operand of True.
3767 if (HasChainOp)
3768 Ops.push_back(True.getOperand(TrueChainOpIdx));
3769
3770 // Add the glue for the CopyToReg of mask->v0.
3771 Ops.push_back(Glue);
3772
3774 CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops);
3775 Result->setFlags(True->getFlags());
3776
3777 if (!cast<MachineSDNode>(True)->memoperands_empty())
3778 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(True)->memoperands());
3779
3780 // Replace vmerge.vvm node by Result.
3781 ReplaceUses(SDValue(N, 0), SDValue(Result, 0));
3782
3783 // Replace another value of True. E.g. chain and VL.
3784 for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx)
3785 ReplaceUses(True.getValue(Idx), SDValue(Result, Idx));
3786
3787 return true;
3788}
3789
3790bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
3791 bool MadeChange = false;
3793
3794 while (Position != CurDAG->allnodes_begin()) {
3795 SDNode *N = &*--Position;
3796 if (N->use_empty() || !N->isMachineOpcode())
3797 continue;
3798
3799 if (IsVMerge(N) || IsVMv(N))
3800 MadeChange |= performCombineVMergeAndVOps(N);
3801 }
3802 return MadeChange;
3803}
3804
3805/// If our passthru is an implicit_def, use noreg instead. This side
3806/// steps issues with MachineCSE not being able to CSE expressions with
3807/// IMPLICIT_DEF operands while preserving the semantic intent. See
3808/// pr64282 for context. Note that this transform is the last one
3809/// performed at ISEL DAG to DAG.
3810bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
3811 bool MadeChange = false;
3813
3814 while (Position != CurDAG->allnodes_begin()) {
3815 SDNode *N = &*--Position;
3816 if (N->use_empty() || !N->isMachineOpcode())
3817 continue;
3818
3819 const unsigned Opc = N->getMachineOpcode();
3820 if (!RISCVVPseudosTable::getPseudoInfo(Opc) ||
3822 !isImplicitDef(N->getOperand(0)))
3823 continue;
3824
3826 Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0)));
3827 for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
3828 SDValue Op = N->getOperand(I);
3829 Ops.push_back(Op);
3830 }
3831
3833 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3834 Result->setFlags(N->getFlags());
3835 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands());
3836 ReplaceUses(N, Result);
3837 MadeChange = true;
3838 }
3839 return MadeChange;
3840}
3841
3842
3843// This pass converts a legalized DAG into a RISCV-specific DAG, ready
3844// for instruction scheduling.
3846 CodeGenOptLevel OptLevel) {
3847 return new RISCVDAGToDAGISel(TM, OptLevel);
3848}
3849
3850char RISCVDAGToDAGISel::ID = 0;
3851
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define DEBUG_TYPE
const HexagonInstrInfo * TII
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
R600 Clause Merge
static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, int64_t Imm, const RISCVSubtarget &Subtarget)
#define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b)
static bool isWorthFoldingAdd(SDValue Add)
static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, RISCVMatInt::InstSeq &Seq)
static bool isImplicitDef(SDValue V)
static unsigned GetVMSetForLMul(RISCVII::VLMUL LMUL)
#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix)
static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp)
static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo, unsigned Bits, const TargetInstrInfo *TII)
static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, const RISCVSubtarget *Subtarget, SDValue Addr, SDValue &Base, SDValue &Offset, bool IsPrefetch=false)
static bool IsVMv(SDNode *N)
static cl::opt< bool > UsePseudoMovImm("riscv-use-rematerializable-movimm", cl::Hidden, cl::desc("Use a rematerializable pseudoinstruction for 2 instruction " "constant materialization"), cl::init(false))
#define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b)
static SDValue findVSplat(SDValue N)
static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, std::function< bool(int64_t)> ValidateImm)
static bool IsVMerge(SDNode *N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
#define PASS_NAME
Value * RHS
Value * LHS
bool isZero() const
Definition: APFloat.h:1291
APInt bitcastToAPInt() const
Definition: APFloat.h:1210
bool isPosZero() const
Definition: APFloat.h:1306
bool isNegZero() const
Definition: APFloat.h:1307
Class for arbitrary precision integers.
Definition: APInt.h:76
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1433
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1229
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:264
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1507
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
const APFloat & getValueAPF() const
uint64_t getZExtValue() const
int64_t getSExtValue() const
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
This class is used to form a handle around another node that is persistent and is updated across invo...
static StringRef getMemConstraintName(ConstraintCode C)
Definition: InlineAsm.h:467
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Definition: MCInstrDesc.h:219
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by other flags.
Definition: MCInstrDesc.h:463
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
void setFlags(Flags f)
Bitwise OR the current flags with the given flags.
An SDNode that represents everything that will be needed to construct a MachineInstr.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
bool selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal, SDValue &Val)
RISC-V doesn't have general instructions for integer setne/seteq, but we can check for equality with ...
bool selectSExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectZExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectSHXADD_UWOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD_UW.
bool hasAllNBitUsers(SDNode *Node, unsigned Bits, const unsigned Depth=0) const
void selectVSSEG(SDNode *Node, bool IsMasked, bool IsStrided)
bool SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, SDValue &Offset)
Similar to SelectAddrRegImm, except that the least significant 5 bits of Offset shoule be all zeros.
bool SelectFrameAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset)
void selectVLSEGFF(SDNode *Node, bool IsMasked)
bool selectFPImm(SDValue N, SDValue &Imm)
bool selectSimm5Shl2(SDValue N, SDValue &Simm5, SDValue &Shl2)
void selectSF_VC_X_SE(SDNode *Node)
bool selectLow8BitsVSplat(SDValue N, SDValue &SplatVal)
bool hasAllHUsers(SDNode *Node) const
bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
bool selectVSplatSimm5(SDValue N, SDValue &SplatVal)
bool selectRVVSimm5(SDValue N, unsigned Width, SDValue &Imm)
bool SelectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset)
bool hasAllWUsers(SDNode *Node) const
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
void Select(SDNode *Node) override
Main hook for targets to transform nodes into machine nodes.
bool selectVSplat(SDValue N, SDValue &SplatVal)
void addVectorLoadStoreOperands(SDNode *Node, unsigned SEWImm, const SDLoc &DL, unsigned CurOp, bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl< SDValue > &Operands, bool IsLoad=false, MVT *IndexVT=nullptr)
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
void selectVLXSEG(SDNode *Node, bool IsMasked, bool IsOrdered)
bool tryShrinkShlLogicImm(SDNode *Node)
void selectVSETVLI(SDNode *Node)
bool selectVLOp(SDValue N, SDValue &VL)
bool trySignedBitfieldExtract(SDNode *Node)
void selectVSXSEG(SDNode *Node, bool IsMasked, bool IsOrdered)
bool selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal)
bool selectVSplatSimm5Plus1NonZero(SDValue N, SDValue &SplatVal)
bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset, bool IsINX=false)
void selectVLSEG(SDNode *Node, bool IsMasked, bool IsStrided)
bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt)
bool selectSHXADDOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD.
bool tryIndexedLoad(SDNode *Node)
bool SelectAddrRegRegScale(SDValue Addr, unsigned MaxShiftAmount, SDValue &Base, SDValue &Index, SDValue &Scale)
bool selectVSplatUimm(SDValue N, unsigned Bits, SDValue &SplatVal)
unsigned getXLen() const
bool hasVInstructions() const
std::optional< unsigned > getRealVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
static unsigned getRegClassIDForVecVT(MVT VT)
static RISCVII::VLMUL getLMUL(MVT VT)
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
SDNode * getGluedNode() const
If this node has a glue operand, return the node to which the glue operand points.
op_iterator op_begin() const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
unsigned getNumOperands() const
const TargetLowering * TLI
MachineFunction * MF
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const
IsProfitableToFold - Returns true if it's profitable to fold the specific operand node N of U during ...
static bool IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, CodeGenOptLevel OptLevel, bool IgnoreChains=false)
IsLegalToFold - Returns true if the specific operand node N of U can be folded during instruction sel...
bool mayRaiseFPException(SDNode *Node) const
Return whether the node may raise an FP exception.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:722
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
Definition: SelectionDAG.h:551
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:448
allnodes_const_iterator allnodes_begin() const
Definition: SelectionDAG.h:531
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
allnodes_const_iterator allnodes_end() const
Definition: SelectionDAG.h:532
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:727
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getRegister(unsigned Reg, EVT VT)
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:773
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, uint64_t Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:676
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:799
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
Definition: SelectionDAG.h:560
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
ilist< SDNode >::iterator allnodes_iterator
Definition: SelectionDAG.h:534
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
TargetInstrInfo - Interface to description of machine instruction set.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:332
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:750
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:559
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1239
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1029
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:783
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:903
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:774
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1219
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1235
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:627
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:705
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:573
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:780
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:798
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:680
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1461
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1512
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1557
static bool hasRoundModeOp(uint64_t TSFlags)
static VLMUL getLMul(uint64_t TSFlags)
static bool hasVLOp(uint64_t TSFlags)
static bool hasVecPolicyOp(uint64_t TSFlags)
static bool hasSEWOp(uint64_t TSFlags)
static bool isFirstDefTiedToFirstUse(const MCInstrDesc &Desc)
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
static unsigned decodeVSEW(unsigned VSEW)
unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul)
unsigned encodeVTYPE(RISCVII::VLMUL VLMUL, unsigned SEW, bool TailAgnostic, bool MaskAgnostic)
std::optional< unsigned > getVectorLowDemandedScalarBits(uint16_t Opcode, unsigned Log2SEW)
unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode)
static constexpr int64_t VLMaxSentinel
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
static const MachineMemOperand::Flags MONontemporalBit1
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:228
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition: bit.h:317
static const MachineMemOperand::Flags MONontemporalBit0
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:258
unsigned M1(unsigned Val)
Definition: VE.h:376
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:313
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:264
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:246
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
@ Add
Sum of integers.
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
FunctionPass * createRISCVISelDag(RISCVTargetMachine &TM, CodeGenOptLevel OptLevel)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:452
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:34
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:373
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:149
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
bool hasNoFPExcept() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.