LLVM 20.0.0git
RISCVISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the RISC-V target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "RISCVISelDAGToDAG.h"
17#include "RISCVISelLowering.h"
18#include "RISCVInstrInfo.h"
21#include "llvm/IR/IntrinsicsRISCV.h"
23#include "llvm/Support/Debug.h"
26
27using namespace llvm;
28
29#define DEBUG_TYPE "riscv-isel"
30#define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
31
33 "riscv-use-rematerializable-movimm", cl::Hidden,
34 cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
35 "constant materialization"),
36 cl::init(false));
37
38namespace llvm::RISCV {
39#define GET_RISCVVSSEGTable_IMPL
40#define GET_RISCVVLSEGTable_IMPL
41#define GET_RISCVVLXSEGTable_IMPL
42#define GET_RISCVVSXSEGTable_IMPL
43#define GET_RISCVVLETable_IMPL
44#define GET_RISCVVSETable_IMPL
45#define GET_RISCVVLXTable_IMPL
46#define GET_RISCVVSXTable_IMPL
47#include "RISCVGenSearchableTables.inc"
48} // namespace llvm::RISCV
49
52
53 bool MadeChange = false;
54 while (Position != CurDAG->allnodes_begin()) {
55 SDNode *N = &*--Position;
56 if (N->use_empty())
57 continue;
58
59 SDValue Result;
60 switch (N->getOpcode()) {
61 case ISD::SPLAT_VECTOR: {
62 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
63 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
64 MVT VT = N->getSimpleValueType(0);
65 unsigned Opc =
67 SDLoc DL(N);
68 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
69 SDValue Src = N->getOperand(0);
70 if (VT.isInteger())
71 Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(),
72 N->getOperand(0));
73 Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL);
74 break;
75 }
77 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
78 // load. Done after lowering and combining so that we have a chance to
79 // optimize this to VMV_V_X_VL when the upper bits aren't needed.
80 assert(N->getNumOperands() == 4 && "Unexpected number of operands");
81 MVT VT = N->getSimpleValueType(0);
82 SDValue Passthru = N->getOperand(0);
83 SDValue Lo = N->getOperand(1);
84 SDValue Hi = N->getOperand(2);
85 SDValue VL = N->getOperand(3);
86 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
87 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
88 "Unexpected VTs!");
90 SDLoc DL(N);
91
92 // Create temporary stack for each expanding node.
93 SDValue StackSlot =
95 int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex();
97
98 SDValue Chain = CurDAG->getEntryNode();
99 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
100
101 SDValue OffsetSlot =
103 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
104 Align(8));
105
106 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
107
108 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
109 SDValue IntID =
110 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
111 SDValue Ops[] = {Chain,
112 IntID,
113 Passthru,
114 StackSlot,
115 CurDAG->getRegister(RISCV::X0, MVT::i64),
116 VL};
117
119 MVT::i64, MPI, Align(8),
121 break;
122 }
123 }
124
125 if (Result) {
126 LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: ");
127 LLVM_DEBUG(N->dump(CurDAG));
128 LLVM_DEBUG(dbgs() << "\nNew: ");
129 LLVM_DEBUG(Result->dump(CurDAG));
130 LLVM_DEBUG(dbgs() << "\n");
131
133 MadeChange = true;
134 }
135 }
136
137 if (MadeChange)
139}
140
142 HandleSDNode Dummy(CurDAG->getRoot());
144
145 bool MadeChange = false;
146 while (Position != CurDAG->allnodes_begin()) {
147 SDNode *N = &*--Position;
148 // Skip dead nodes and any non-machine opcodes.
149 if (N->use_empty() || !N->isMachineOpcode())
150 continue;
151
152 MadeChange |= doPeepholeSExtW(N);
153
154 // FIXME: This is here only because the VMerge transform doesn't
155 // know how to handle masked true inputs. Once that has been moved
156 // to post-ISEL, this can be deleted as well.
157 MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N));
158 }
159
160 CurDAG->setRoot(Dummy.getValue());
161
162 MadeChange |= doPeepholeMergeVVMFold();
163
164 // After we're done with everything else, convert IMPLICIT_DEF
165 // passthru operands to NoRegister. This is required to workaround
166 // an optimization deficiency in MachineCSE. This really should
167 // be merged back into each of the patterns (i.e. there's no good
168 // reason not to go directly to NoReg), but is being done this way
169 // to allow easy backporting.
170 MadeChange |= doPeepholeNoRegPassThru();
171
172 if (MadeChange)
174}
175
176static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
178 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);
179 for (const RISCVMatInt::Inst &Inst : Seq) {
180 SDValue SDImm = CurDAG->getTargetConstant(Inst.getImm(), DL, VT);
181 SDNode *Result = nullptr;
182 switch (Inst.getOpndKind()) {
183 case RISCVMatInt::Imm:
184 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm);
185 break;
187 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg,
188 CurDAG->getRegister(RISCV::X0, VT));
189 break;
191 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg);
192 break;
194 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm);
195 break;
196 }
197
198 // Only the first instruction has X0 as its source.
199 SrcReg = SDValue(Result, 0);
200 }
201
202 return SrcReg;
203}
204
205static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
206 int64_t Imm, const RISCVSubtarget &Subtarget) {
208
209 // Use a rematerializable pseudo instruction for short sequences if enabled.
210 if (Seq.size() == 2 && UsePseudoMovImm)
211 return SDValue(
212 CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT,
213 CurDAG->getTargetConstant(Imm, DL, VT)),
214 0);
215
216 // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
217 // worst an LUI+ADDIW. This will require an extra register, but avoids a
218 // constant pool.
219 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
220 // low and high 32 bits are the same and bit 31 and 63 are set.
221 if (Seq.size() > 3) {
222 unsigned ShiftAmt, AddOpc;
224 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
225 if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
226 SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
227
228 SDValue SLLI = SDValue(
229 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
230 CurDAG->getTargetConstant(ShiftAmt, DL, VT)),
231 0);
232 return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0);
233 }
234 }
235
236 // Otherwise, use the original sequence.
237 return selectImmSeq(CurDAG, DL, VT, Seq);
238}
239
241 unsigned NF, RISCVII::VLMUL LMUL) {
242 static const unsigned M1TupleRegClassIDs[] = {
243 RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID,
244 RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID,
245 RISCV::VRN8M1RegClassID};
246 static const unsigned M2TupleRegClassIDs[] = {RISCV::VRN2M2RegClassID,
247 RISCV::VRN3M2RegClassID,
248 RISCV::VRN4M2RegClassID};
249
250 assert(Regs.size() >= 2 && Regs.size() <= 8);
251
252 unsigned RegClassID;
253 unsigned SubReg0;
254 switch (LMUL) {
255 default:
256 llvm_unreachable("Invalid LMUL.");
261 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
262 "Unexpected subreg numbering");
263 SubReg0 = RISCV::sub_vrm1_0;
264 RegClassID = M1TupleRegClassIDs[NF - 2];
265 break;
267 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
268 "Unexpected subreg numbering");
269 SubReg0 = RISCV::sub_vrm2_0;
270 RegClassID = M2TupleRegClassIDs[NF - 2];
271 break;
273 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
274 "Unexpected subreg numbering");
275 SubReg0 = RISCV::sub_vrm4_0;
276 RegClassID = RISCV::VRN2M4RegClassID;
277 break;
278 }
279
280 SDLoc DL(Regs[0]);
282
283 Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32));
284
285 for (unsigned I = 0; I < Regs.size(); ++I) {
286 Ops.push_back(Regs[I]);
287 Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32));
288 }
289 SDNode *N =
290 CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
291 return SDValue(N, 0);
292}
293
295 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
296 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
297 bool IsLoad, MVT *IndexVT) {
298 SDValue Chain = Node->getOperand(0);
299 SDValue Glue;
300
301 Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.
302
303 if (IsStridedOrIndexed) {
304 Operands.push_back(Node->getOperand(CurOp++)); // Index.
305 if (IndexVT)
306 *IndexVT = Operands.back()->getSimpleValueType(0);
307 }
308
309 if (IsMasked) {
310 // Mask needs to be copied to V0.
311 SDValue Mask = Node->getOperand(CurOp++);
312 Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue());
313 Glue = Chain.getValue(1);
314 Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType()));
315 }
316 SDValue VL;
317 selectVLOp(Node->getOperand(CurOp++), VL);
318 Operands.push_back(VL);
319
320 MVT XLenVT = Subtarget->getXLenVT();
321 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
322 Operands.push_back(SEWOp);
323
324 // At the IR layer, all the masked load intrinsics have policy operands,
325 // none of the others do. All have passthru operands. For our pseudos,
326 // all loads have policy operands.
327 if (IsLoad) {
329 if (IsMasked)
330 Policy = Node->getConstantOperandVal(CurOp++);
331 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
332 Operands.push_back(PolicyOp);
333 }
334
335 Operands.push_back(Chain); // Chain.
336 if (Glue)
337 Operands.push_back(Glue);
338}
339
340void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked,
341 bool IsStrided) {
342 SDLoc DL(Node);
343 unsigned NF = Node->getNumValues() - 1;
344 MVT VT = Node->getSimpleValueType(0);
345 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
347
348 unsigned CurOp = 2;
350
351 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
352 Node->op_begin() + CurOp + NF);
353 SDValue Merge = createTuple(*CurDAG, Regs, NF, LMUL);
354 Operands.push_back(Merge);
355 CurOp += NF;
356
357 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
358 Operands, /*IsLoad=*/true);
359
360 const RISCV::VLSEGPseudo *P =
361 RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,
362 static_cast<unsigned>(LMUL));
363 MachineSDNode *Load =
364 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
365
366 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
367 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
368
369 SDValue SuperReg = SDValue(Load, 0);
370 for (unsigned I = 0; I < NF; ++I) {
371 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
372 ReplaceUses(SDValue(Node, I),
373 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
374 }
375
376 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
377 CurDAG->RemoveDeadNode(Node);
378}
379
380void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) {
381 SDLoc DL(Node);
382 unsigned NF = Node->getNumValues() - 2; // Do not count VL and Chain.
383 MVT VT = Node->getSimpleValueType(0);
384 MVT XLenVT = Subtarget->getXLenVT();
385 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
387
388 unsigned CurOp = 2;
390
391 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
392 Node->op_begin() + CurOp + NF);
393 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
394 Operands.push_back(MaskedOff);
395 CurOp += NF;
396
397 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
398 /*IsStridedOrIndexed*/ false, Operands,
399 /*IsLoad=*/true);
400
401 const RISCV::VLSEGPseudo *P =
402 RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,
403 Log2SEW, static_cast<unsigned>(LMUL));
404 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
405 XLenVT, MVT::Other, Operands);
406
407 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
408 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
409
410 SDValue SuperReg = SDValue(Load, 0);
411 for (unsigned I = 0; I < NF; ++I) {
412 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
413 ReplaceUses(SDValue(Node, I),
414 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
415 }
416
417 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); // VL
418 ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 2)); // Chain
419 CurDAG->RemoveDeadNode(Node);
420}
421
422void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked,
423 bool IsOrdered) {
424 SDLoc DL(Node);
425 unsigned NF = Node->getNumValues() - 1;
426 MVT VT = Node->getSimpleValueType(0);
427 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
429
430 unsigned CurOp = 2;
432
433 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
434 Node->op_begin() + CurOp + NF);
435 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
436 Operands.push_back(MaskedOff);
437 CurOp += NF;
438
439 MVT IndexVT;
440 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
441 /*IsStridedOrIndexed*/ true, Operands,
442 /*IsLoad=*/true, &IndexVT);
443
445 "Element count mismatch");
446
447 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
448 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
449 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
450 report_fatal_error("The V extension does not support EEW=64 for index "
451 "values when XLEN=32");
452 }
453 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
454 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
455 static_cast<unsigned>(IndexLMUL));
456 MachineSDNode *Load =
457 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
458
459 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
460 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
461
462 SDValue SuperReg = SDValue(Load, 0);
463 for (unsigned I = 0; I < NF; ++I) {
464 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
465 ReplaceUses(SDValue(Node, I),
466 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
467 }
468
469 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
470 CurDAG->RemoveDeadNode(Node);
471}
472
473void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, bool IsMasked,
474 bool IsStrided) {
475 SDLoc DL(Node);
476 unsigned NF = Node->getNumOperands() - 4;
477 if (IsStrided)
478 NF--;
479 if (IsMasked)
480 NF--;
481 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
482 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
484 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
485 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
486
488 Operands.push_back(StoreVal);
489 unsigned CurOp = 2 + NF;
490
491 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
492 Operands);
493
494 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
495 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
496 MachineSDNode *Store =
497 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
498
499 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
500 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
501
502 ReplaceNode(Node, Store);
503}
504
505void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked,
506 bool IsOrdered) {
507 SDLoc DL(Node);
508 unsigned NF = Node->getNumOperands() - 5;
509 if (IsMasked)
510 --NF;
511 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
512 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
514 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
515 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
516
518 Operands.push_back(StoreVal);
519 unsigned CurOp = 2 + NF;
520
521 MVT IndexVT;
522 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
523 /*IsStridedOrIndexed*/ true, Operands,
524 /*IsLoad=*/false, &IndexVT);
525
527 "Element count mismatch");
528
529 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
530 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
531 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
532 report_fatal_error("The V extension does not support EEW=64 for index "
533 "values when XLEN=32");
534 }
535 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
536 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
537 static_cast<unsigned>(IndexLMUL));
538 MachineSDNode *Store =
539 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
540
541 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
542 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
543
544 ReplaceNode(Node, Store);
545}
546
548 if (!Subtarget->hasVInstructions())
549 return;
550
551 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
552
553 SDLoc DL(Node);
554 MVT XLenVT = Subtarget->getXLenVT();
555
556 unsigned IntNo = Node->getConstantOperandVal(0);
557
558 assert((IntNo == Intrinsic::riscv_vsetvli ||
559 IntNo == Intrinsic::riscv_vsetvlimax) &&
560 "Unexpected vsetvli intrinsic");
561
562 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
563 unsigned Offset = (VLMax ? 1 : 2);
564
565 assert(Node->getNumOperands() == Offset + 2 &&
566 "Unexpected number of operands");
567
568 unsigned SEW =
569 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
570 RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>(
571 Node->getConstantOperandVal(Offset + 1) & 0x7);
572
573 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
574 /*MaskAgnostic*/ true);
575 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
576
577 SDValue VLOperand;
578 unsigned Opcode = RISCV::PseudoVSETVLI;
579 if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
580 if (auto VLEN = Subtarget->getRealVLen())
581 if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())
582 VLMax = true;
583 }
584 if (VLMax || isAllOnesConstant(Node->getOperand(1))) {
585 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
586 Opcode = RISCV::PseudoVSETVLIX0;
587 } else {
588 VLOperand = Node->getOperand(1);
589
590 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
591 uint64_t AVL = C->getZExtValue();
592 if (isUInt<5>(AVL)) {
593 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
594 ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL,
595 XLenVT, VLImm, VTypeIOp));
596 return;
597 }
598 }
599 }
600
601 ReplaceNode(Node,
602 CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp));
603}
604
606 MVT VT = Node->getSimpleValueType(0);
607 unsigned Opcode = Node->getOpcode();
608 assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&
609 "Unexpected opcode");
610 SDLoc DL(Node);
611
612 // For operations of the form (x << C1) op C2, check if we can use
613 // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
614 SDValue N0 = Node->getOperand(0);
615 SDValue N1 = Node->getOperand(1);
616
617 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
618 if (!Cst)
619 return false;
620
621 int64_t Val = Cst->getSExtValue();
622
623 // Check if immediate can already use ANDI/ORI/XORI.
624 if (isInt<12>(Val))
625 return false;
626
627 SDValue Shift = N0;
628
629 // If Val is simm32 and we have a sext_inreg from i32, then the binop
630 // produces at least 33 sign bits. We can peek through the sext_inreg and use
631 // a SLLIW at the end.
632 bool SignExt = false;
633 if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
634 N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) {
635 SignExt = true;
636 Shift = N0.getOperand(0);
637 }
638
639 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
640 return false;
641
642 ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
643 if (!ShlCst)
644 return false;
645
646 uint64_t ShAmt = ShlCst->getZExtValue();
647
648 // Make sure that we don't change the operation by removing bits.
649 // This only matters for OR and XOR, AND is unaffected.
650 uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt);
651 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
652 return false;
653
654 int64_t ShiftedVal = Val >> ShAmt;
655 if (!isInt<12>(ShiftedVal))
656 return false;
657
658 // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
659 if (SignExt && ShAmt >= 32)
660 return false;
661
662 // Ok, we can reorder to get a smaller immediate.
663 unsigned BinOpc;
664 switch (Opcode) {
665 default: llvm_unreachable("Unexpected opcode");
666 case ISD::AND: BinOpc = RISCV::ANDI; break;
667 case ISD::OR: BinOpc = RISCV::ORI; break;
668 case ISD::XOR: BinOpc = RISCV::XORI; break;
669 }
670
671 unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;
672
673 SDNode *BinOp =
674 CurDAG->getMachineNode(BinOpc, DL, VT, Shift.getOperand(0),
675 CurDAG->getTargetConstant(ShiftedVal, DL, VT));
676 SDNode *SLLI =
677 CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0),
678 CurDAG->getTargetConstant(ShAmt, DL, VT));
679 ReplaceNode(Node, SLLI);
680 return true;
681}
682
684 // Only supported with XTHeadBb at the moment.
685 if (!Subtarget->hasVendorXTHeadBb())
686 return false;
687
688 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
689 if (!N1C)
690 return false;
691
692 SDValue N0 = Node->getOperand(0);
693 if (!N0.hasOneUse())
694 return false;
695
696 auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb, SDLoc DL,
697 MVT VT) {
698 return CurDAG->getMachineNode(RISCV::TH_EXT, DL, VT, N0.getOperand(0),
699 CurDAG->getTargetConstant(Msb, DL, VT),
700 CurDAG->getTargetConstant(Lsb, DL, VT));
701 };
702
703 SDLoc DL(Node);
704 MVT VT = Node->getSimpleValueType(0);
705 const unsigned RightShAmt = N1C->getZExtValue();
706
707 // Transform (sra (shl X, C1) C2) with C1 < C2
708 // -> (TH.EXT X, msb, lsb)
709 if (N0.getOpcode() == ISD::SHL) {
710 auto *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
711 if (!N01C)
712 return false;
713
714 const unsigned LeftShAmt = N01C->getZExtValue();
715 // Make sure that this is a bitfield extraction (i.e., the shift-right
716 // amount can not be less than the left-shift).
717 if (LeftShAmt > RightShAmt)
718 return false;
719
720 const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
721 const unsigned Msb = MsbPlusOne - 1;
722 const unsigned Lsb = RightShAmt - LeftShAmt;
723
724 SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
725 ReplaceNode(Node, TH_EXT);
726 return true;
727 }
728
729 // Transform (sra (sext_inreg X, _), C) ->
730 // (TH.EXT X, msb, lsb)
731 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
732 unsigned ExtSize =
733 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
734
735 // ExtSize of 32 should use sraiw via tablegen pattern.
736 if (ExtSize == 32)
737 return false;
738
739 const unsigned Msb = ExtSize - 1;
740 const unsigned Lsb = RightShAmt;
741
742 SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
743 ReplaceNode(Node, TH_EXT);
744 return true;
745 }
746
747 return false;
748}
749
751 // Target does not support indexed loads.
752 if (!Subtarget->hasVendorXTHeadMemIdx())
753 return false;
754
755 LoadSDNode *Ld = cast<LoadSDNode>(Node);
757 if (AM == ISD::UNINDEXED)
758 return false;
759
760 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Ld->getOffset());
761 if (!C)
762 return false;
763
764 EVT LoadVT = Ld->getMemoryVT();
765 assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) &&
766 "Unexpected addressing mode");
767 bool IsPre = AM == ISD::PRE_INC;
768 bool IsPost = AM == ISD::POST_INC;
769 int64_t Offset = C->getSExtValue();
770
771 // The constants that can be encoded in the THeadMemIdx instructions
772 // are of the form (sign_extend(imm5) << imm2).
773 int64_t Shift;
774 for (Shift = 0; Shift < 4; Shift++)
775 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
776 break;
777
778 // Constant cannot be encoded.
779 if (Shift == 4)
780 return false;
781
782 bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
783 unsigned Opcode;
784 if (LoadVT == MVT::i8 && IsPre)
785 Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
786 else if (LoadVT == MVT::i8 && IsPost)
787 Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
788 else if (LoadVT == MVT::i16 && IsPre)
789 Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
790 else if (LoadVT == MVT::i16 && IsPost)
791 Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
792 else if (LoadVT == MVT::i32 && IsPre)
793 Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
794 else if (LoadVT == MVT::i32 && IsPost)
795 Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
796 else if (LoadVT == MVT::i64 && IsPre)
797 Opcode = RISCV::TH_LDIB;
798 else if (LoadVT == MVT::i64 && IsPost)
799 Opcode = RISCV::TH_LDIA;
800 else
801 return false;
802
803 EVT Ty = Ld->getOffset().getValueType();
804 SDValue Ops[] = {Ld->getBasePtr(),
805 CurDAG->getTargetConstant(Offset >> Shift, SDLoc(Node), Ty),
806 CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty),
807 Ld->getChain()};
808 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0),
809 Ld->getValueType(1), MVT::Other, Ops);
810
811 MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand();
812 CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp});
813
814 ReplaceNode(Node, New);
815
816 return true;
817}
818
820 if (!Subtarget->hasVInstructions())
821 return;
822
823 assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode");
824
825 SDLoc DL(Node);
826 unsigned IntNo = Node->getConstantOperandVal(1);
827
828 assert((IntNo == Intrinsic::riscv_sf_vc_x_se ||
829 IntNo == Intrinsic::riscv_sf_vc_i_se) &&
830 "Unexpected vsetvli intrinsic");
831
832 // imm, imm, imm, simm5/scalar, sew, log2lmul, vl
833 unsigned Log2SEW = Log2_32(Node->getConstantOperandVal(6));
834 SDValue SEWOp =
835 CurDAG->getTargetConstant(Log2SEW, DL, Subtarget->getXLenVT());
836 SmallVector<SDValue, 8> Operands = {Node->getOperand(2), Node->getOperand(3),
837 Node->getOperand(4), Node->getOperand(5),
838 Node->getOperand(8), SEWOp,
839 Node->getOperand(0)};
840
841 unsigned Opcode;
842 auto *LMulSDNode = cast<ConstantSDNode>(Node->getOperand(7));
843 switch (LMulSDNode->getSExtValue()) {
844 case 5:
845 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF8
846 : RISCV::PseudoVC_I_SE_MF8;
847 break;
848 case 6:
849 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF4
850 : RISCV::PseudoVC_I_SE_MF4;
851 break;
852 case 7:
853 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF2
854 : RISCV::PseudoVC_I_SE_MF2;
855 break;
856 case 0:
857 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M1
858 : RISCV::PseudoVC_I_SE_M1;
859 break;
860 case 1:
861 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M2
862 : RISCV::PseudoVC_I_SE_M2;
863 break;
864 case 2:
865 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M4
866 : RISCV::PseudoVC_I_SE_M4;
867 break;
868 case 3:
869 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M8
870 : RISCV::PseudoVC_I_SE_M8;
871 break;
872 }
873
875 Opcode, DL, Node->getSimpleValueType(0), Operands));
876}
877
879 // If we have a custom node, we have already selected.
880 if (Node->isMachineOpcode()) {
881 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
882 Node->setNodeId(-1);
883 return;
884 }
885
886 // Instruction Selection not handled by the auto-generated tablegen selection
887 // should be handled here.
888 unsigned Opcode = Node->getOpcode();
889 MVT XLenVT = Subtarget->getXLenVT();
890 SDLoc DL(Node);
891 MVT VT = Node->getSimpleValueType(0);
892
893 bool HasBitTest = Subtarget->hasStdExtZbs() || Subtarget->hasVendorXTHeadBs();
894
895 switch (Opcode) {
896 case ISD::Constant: {
897 assert((VT == Subtarget->getXLenVT() || VT == MVT::i32) && "Unexpected VT");
898 auto *ConstNode = cast<ConstantSDNode>(Node);
899 if (ConstNode->isZero()) {
900 SDValue New =
901 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
902 ReplaceNode(Node, New.getNode());
903 return;
904 }
905 int64_t Imm = ConstNode->getSExtValue();
906 // If only the lower 8 bits are used, try to convert this to a simm6 by
907 // sign-extending bit 7. This is neutral without the C extension, and
908 // allows C.LI to be used if C is present.
909 if (isUInt<8>(Imm) && isInt<6>(SignExtend64<8>(Imm)) && hasAllBUsers(Node))
910 Imm = SignExtend64<8>(Imm);
911 // If the upper XLen-16 bits are not used, try to convert this to a simm12
912 // by sign extending bit 15.
913 if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) &&
914 hasAllHUsers(Node))
915 Imm = SignExtend64<16>(Imm);
916 // If the upper 32-bits are not used try to convert this into a simm32 by
917 // sign extending bit 32.
918 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
919 Imm = SignExtend64<32>(Imm);
920
921 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode());
922 return;
923 }
924 case ISD::ConstantFP: {
925 const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();
926 auto [FPImm, NeedsFNeg] =
927 static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm(APF,
928 VT);
929 if (FPImm >= 0) {
930 unsigned Opc;
931 unsigned FNegOpc;
932 switch (VT.SimpleTy) {
933 default:
934 llvm_unreachable("Unexpected size");
935 case MVT::f16:
936 Opc = RISCV::FLI_H;
937 FNegOpc = RISCV::FSGNJN_H;
938 break;
939 case MVT::f32:
940 Opc = RISCV::FLI_S;
941 FNegOpc = RISCV::FSGNJN_S;
942 break;
943 case MVT::f64:
944 Opc = RISCV::FLI_D;
945 FNegOpc = RISCV::FSGNJN_D;
946 break;
947 }
949 Opc, DL, VT, CurDAG->getTargetConstant(FPImm, DL, XLenVT));
950 if (NeedsFNeg)
951 Res = CurDAG->getMachineNode(FNegOpc, DL, VT, SDValue(Res, 0),
952 SDValue(Res, 0));
953
954 ReplaceNode(Node, Res);
955 return;
956 }
957
958 bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
959 SDValue Imm;
960 // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
961 // create an integer immediate.
962 if (APF.isPosZero() || NegZeroF64)
963 Imm = CurDAG->getRegister(RISCV::X0, XLenVT);
964 else
965 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
966 *Subtarget);
967
968 bool HasZdinx = Subtarget->hasStdExtZdinx();
969 bool Is64Bit = Subtarget->is64Bit();
970 unsigned Opc;
971 switch (VT.SimpleTy) {
972 default:
973 llvm_unreachable("Unexpected size");
974 case MVT::bf16:
975 assert(Subtarget->hasStdExtZfbfmin());
976 Opc = RISCV::FMV_H_X;
977 break;
978 case MVT::f16:
979 Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
980 break;
981 case MVT::f32:
982 Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
983 break;
984 case MVT::f64:
985 // For RV32, we can't move from a GPR, we need to convert instead. This
986 // should only happen for +0.0 and -0.0.
987 assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
988 if (Is64Bit)
989 Opc = HasZdinx ? RISCV::COPY : RISCV::FMV_D_X;
990 else
991 Opc = HasZdinx ? RISCV::FCVT_D_W_IN32X : RISCV::FCVT_D_W;
992 break;
993 }
994
995 SDNode *Res;
996 if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
997 Res = CurDAG->getMachineNode(
998 Opc, DL, VT, Imm,
1000 else
1001 Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
1002
1003 // For f64 -0.0, we need to insert a fneg.d idiom.
1004 if (NegZeroF64) {
1005 Opc = RISCV::FSGNJN_D;
1006 if (HasZdinx)
1007 Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;
1008 Res =
1009 CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0));
1010 }
1011
1012 ReplaceNode(Node, Res);
1013 return;
1014 }
1016 if (!Subtarget->hasStdExtZdinx())
1017 break;
1018
1019 assert(!Subtarget->is64Bit() && "Unexpected subtarget");
1020
1021 SDValue Ops[] = {
1022 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32),
1023 Node->getOperand(0),
1024 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32),
1025 Node->getOperand(1),
1026 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
1027
1028 SDNode *N =
1029 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::f64, Ops);
1030 ReplaceNode(Node, N);
1031 return;
1032 }
1033 case RISCVISD::SplitF64: {
1034 if (Subtarget->hasStdExtZdinx()) {
1035 assert(!Subtarget->is64Bit() && "Unexpected subtarget");
1036
1037 if (!SDValue(Node, 0).use_empty()) {
1038 SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL, VT,
1039 Node->getOperand(0));
1040 ReplaceUses(SDValue(Node, 0), Lo);
1041 }
1042
1043 if (!SDValue(Node, 1).use_empty()) {
1044 SDValue Hi = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_odd, DL, VT,
1045 Node->getOperand(0));
1046 ReplaceUses(SDValue(Node, 1), Hi);
1047 }
1048
1049 CurDAG->RemoveDeadNode(Node);
1050 return;
1051 }
1052
1053 if (!Subtarget->hasStdExtZfa())
1054 break;
1055 assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
1056 "Unexpected subtarget");
1057
1058 // With Zfa, lower to fmv.x.w and fmvh.x.d.
1059 if (!SDValue(Node, 0).use_empty()) {
1060 SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT,
1061 Node->getOperand(0));
1062 ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0));
1063 }
1064 if (!SDValue(Node, 1).use_empty()) {
1065 SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT,
1066 Node->getOperand(0));
1067 ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0));
1068 }
1069
1070 CurDAG->RemoveDeadNode(Node);
1071 return;
1072 }
1073 case ISD::SHL: {
1074 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1075 if (!N1C)
1076 break;
1077 SDValue N0 = Node->getOperand(0);
1078 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
1079 !isa<ConstantSDNode>(N0.getOperand(1)))
1080 break;
1081 unsigned ShAmt = N1C->getZExtValue();
1082 uint64_t Mask = N0.getConstantOperandVal(1);
1083
1084 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) where C2 has
1085 // 32 leading zeros and C3 trailing zeros.
1086 if (ShAmt <= 32 && isShiftedMask_64(Mask)) {
1087 unsigned XLen = Subtarget->getXLen();
1088 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1089 unsigned TrailingZeros = llvm::countr_zero(Mask);
1090 if (TrailingZeros > 0 && LeadingZeros == 32) {
1091 SDNode *SRLIW = CurDAG->getMachineNode(
1092 RISCV::SRLIW, DL, VT, N0->getOperand(0),
1093 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1094 SDNode *SLLI = CurDAG->getMachineNode(
1095 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1096 CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));
1097 ReplaceNode(Node, SLLI);
1098 return;
1099 }
1100 }
1101 break;
1102 }
1103 case ISD::SRL: {
1104 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1105 if (!N1C)
1106 break;
1107 SDValue N0 = Node->getOperand(0);
1108 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1109 break;
1110 unsigned ShAmt = N1C->getZExtValue();
1111 uint64_t Mask = N0.getConstantOperandVal(1);
1112
1113 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
1114 // 32 leading zeros and C3 trailing zeros.
1115 if (isShiftedMask_64(Mask) && N0.hasOneUse()) {
1116 unsigned XLen = Subtarget->getXLen();
1117 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1118 unsigned TrailingZeros = llvm::countr_zero(Mask);
1119 if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
1120 SDNode *SRLIW = CurDAG->getMachineNode(
1121 RISCV::SRLIW, DL, VT, N0->getOperand(0),
1122 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1123 SDNode *SLLI = CurDAG->getMachineNode(
1124 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1125 CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT));
1126 ReplaceNode(Node, SLLI);
1127 return;
1128 }
1129 }
1130
1131 // Optimize (srl (and X, C2), C) ->
1132 // (srli (slli X, (XLen-C3), (XLen-C3) + C)
1133 // Where C2 is a mask with C3 trailing ones.
1134 // Taking into account that the C2 may have had lower bits unset by
1135 // SimplifyDemandedBits. This avoids materializing the C2 immediate.
1136 // This pattern occurs when type legalizing right shifts for types with
1137 // less than XLen bits.
1138 Mask |= maskTrailingOnes<uint64_t>(ShAmt);
1139 if (!isMask_64(Mask))
1140 break;
1141 unsigned TrailingOnes = llvm::countr_one(Mask);
1142 if (ShAmt >= TrailingOnes)
1143 break;
1144 // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.
1145 if (TrailingOnes == 32) {
1146 SDNode *SRLI = CurDAG->getMachineNode(
1147 Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT,
1148 N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1149 ReplaceNode(Node, SRLI);
1150 return;
1151 }
1152
1153 // Only do the remaining transforms if the AND has one use.
1154 if (!N0.hasOneUse())
1155 break;
1156
1157 // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1158 if (HasBitTest && ShAmt + 1 == TrailingOnes) {
1159 SDNode *BEXTI = CurDAG->getMachineNode(
1160 Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT,
1161 N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1162 ReplaceNode(Node, BEXTI);
1163 return;
1164 }
1165
1166 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
1167 SDNode *SLLI =
1168 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1169 CurDAG->getTargetConstant(LShAmt, DL, VT));
1170 SDNode *SRLI = CurDAG->getMachineNode(
1171 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1172 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1173 ReplaceNode(Node, SRLI);
1174 return;
1175 }
1176 case ISD::SRA: {
1177 if (trySignedBitfieldExtract(Node))
1178 return;
1179
1180 // Optimize (sra (sext_inreg X, i16), C) ->
1181 // (srai (slli X, (XLen-16), (XLen-16) + C)
1182 // And (sra (sext_inreg X, i8), C) ->
1183 // (srai (slli X, (XLen-8), (XLen-8) + C)
1184 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1185 // This transform matches the code we get without Zbb. The shifts are more
1186 // compressible, and this can help expose CSE opportunities in the sdiv by
1187 // constant optimization.
1188 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1189 if (!N1C)
1190 break;
1191 SDValue N0 = Node->getOperand(0);
1192 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
1193 break;
1194 unsigned ShAmt = N1C->getZExtValue();
1195 unsigned ExtSize =
1196 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
1197 // ExtSize of 32 should use sraiw via tablegen pattern.
1198 if (ExtSize >= 32 || ShAmt >= ExtSize)
1199 break;
1200 unsigned LShAmt = Subtarget->getXLen() - ExtSize;
1201 SDNode *SLLI =
1202 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1203 CurDAG->getTargetConstant(LShAmt, DL, VT));
1204 SDNode *SRAI = CurDAG->getMachineNode(
1205 RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
1206 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1207 ReplaceNode(Node, SRAI);
1208 return;
1209 }
1210 case ISD::OR:
1211 case ISD::XOR:
1212 if (tryShrinkShlLogicImm(Node))
1213 return;
1214
1215 break;
1216 case ISD::AND: {
1217 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1218 if (!N1C)
1219 break;
1220
1221 SDValue N0 = Node->getOperand(0);
1222
1223 auto tryUnsignedBitfieldExtract = [&](SDNode *Node, SDLoc DL, MVT VT,
1224 SDValue X, unsigned Msb,
1225 unsigned Lsb) {
1226 if (!Subtarget->hasVendorXTHeadBb())
1227 return false;
1228
1229 SDNode *TH_EXTU = CurDAG->getMachineNode(
1230 RISCV::TH_EXTU, DL, VT, X, CurDAG->getTargetConstant(Msb, DL, VT),
1231 CurDAG->getTargetConstant(Lsb, DL, VT));
1232 ReplaceNode(Node, TH_EXTU);
1233 return true;
1234 };
1235
1236 bool LeftShift = N0.getOpcode() == ISD::SHL;
1237 if (LeftShift || N0.getOpcode() == ISD::SRL) {
1238 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1239 if (!C)
1240 break;
1241 unsigned C2 = C->getZExtValue();
1242 unsigned XLen = Subtarget->getXLen();
1243 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1244
1245 // Keep track of whether this is a c.andi. If we can't use c.andi, the
1246 // shift pair might offer more compression opportunities.
1247 // TODO: We could check for C extension here, but we don't have many lit
1248 // tests with the C extension enabled so not checking gets better
1249 // coverage.
1250 // TODO: What if ANDI faster than shift?
1251 bool IsCANDI = isInt<6>(N1C->getSExtValue());
1252
1253 uint64_t C1 = N1C->getZExtValue();
1254
1255 // Clear irrelevant bits in the mask.
1256 if (LeftShift)
1257 C1 &= maskTrailingZeros<uint64_t>(C2);
1258 else
1259 C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
1260
1261 // Some transforms should only be done if the shift has a single use or
1262 // the AND would become (srli (slli X, 32), 32)
1263 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
1264
1265 SDValue X = N0.getOperand(0);
1266
1267 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1268 // with c3 leading zeros.
1269 if (!LeftShift && isMask_64(C1)) {
1270 unsigned Leading = XLen - llvm::bit_width(C1);
1271 if (C2 < Leading) {
1272 // If the number of leading zeros is C2+32 this can be SRLIW.
1273 if (C2 + 32 == Leading) {
1274 SDNode *SRLIW = CurDAG->getMachineNode(
1275 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
1276 ReplaceNode(Node, SRLIW);
1277 return;
1278 }
1279
1280 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1281 // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1282 //
1283 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1284 // legalized and goes through DAG combine.
1285 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
1286 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1287 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
1288 SDNode *SRAIW =
1289 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0),
1290 CurDAG->getTargetConstant(31, DL, VT));
1291 SDNode *SRLIW = CurDAG->getMachineNode(
1292 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),
1293 CurDAG->getTargetConstant(Leading - 32, DL, VT));
1294 ReplaceNode(Node, SRLIW);
1295 return;
1296 }
1297
1298 // Try to use an unsigned bitfield extract (e.g., th.extu) if
1299 // available.
1300 // Transform (and (srl x, C2), C1)
1301 // -> (<bfextract> x, msb, lsb)
1302 //
1303 // Make sure to keep this below the SRLIW cases, as we always want to
1304 // prefer the more common instruction.
1305 const unsigned Msb = llvm::bit_width(C1) + C2 - 1;
1306 const unsigned Lsb = C2;
1307 if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
1308 return;
1309
1310 // (srli (slli x, c3-c2), c3).
1311 // Skip if we could use (zext.w (sraiw X, C2)).
1312 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
1313 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1314 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
1315 // Also Skip if we can use bexti or th.tst.
1316 Skip |= HasBitTest && Leading == XLen - 1;
1317 if (OneUseOrZExtW && !Skip) {
1318 SDNode *SLLI = CurDAG->getMachineNode(
1319 RISCV::SLLI, DL, VT, X,
1320 CurDAG->getTargetConstant(Leading - C2, DL, VT));
1321 SDNode *SRLI = CurDAG->getMachineNode(
1322 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1323 CurDAG->getTargetConstant(Leading, DL, VT));
1324 ReplaceNode(Node, SRLI);
1325 return;
1326 }
1327 }
1328 }
1329
1330 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1331 // shifted by c2 bits with c3 leading zeros.
1332 if (LeftShift && isShiftedMask_64(C1)) {
1333 unsigned Leading = XLen - llvm::bit_width(C1);
1334
1335 if (C2 + Leading < XLen &&
1336 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) {
1337 // Use slli.uw when possible.
1338 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
1339 SDNode *SLLI_UW =
1340 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X,
1341 CurDAG->getTargetConstant(C2, DL, VT));
1342 ReplaceNode(Node, SLLI_UW);
1343 return;
1344 }
1345
1346 // (srli (slli c2+c3), c3)
1347 if (OneUseOrZExtW && !IsCANDI) {
1348 SDNode *SLLI = CurDAG->getMachineNode(
1349 RISCV::SLLI, DL, VT, X,
1350 CurDAG->getTargetConstant(C2 + Leading, DL, VT));
1351 SDNode *SRLI = CurDAG->getMachineNode(
1352 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1353 CurDAG->getTargetConstant(Leading, DL, VT));
1354 ReplaceNode(Node, SRLI);
1355 return;
1356 }
1357 }
1358 }
1359
1360 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1361 // shifted mask with c2 leading zeros and c3 trailing zeros.
1362 if (!LeftShift && isShiftedMask_64(C1)) {
1363 unsigned Leading = XLen - llvm::bit_width(C1);
1364 unsigned Trailing = llvm::countr_zero(C1);
1365 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
1366 !IsCANDI) {
1367 unsigned SrliOpc = RISCV::SRLI;
1368 // If the input is zexti32 we should use SRLIW.
1369 if (X.getOpcode() == ISD::AND &&
1370 isa<ConstantSDNode>(X.getOperand(1)) &&
1371 X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
1372 SrliOpc = RISCV::SRLIW;
1373 X = X.getOperand(0);
1374 }
1375 SDNode *SRLI = CurDAG->getMachineNode(
1376 SrliOpc, DL, VT, X,
1377 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1378 SDNode *SLLI = CurDAG->getMachineNode(
1379 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1380 CurDAG->getTargetConstant(Trailing, DL, VT));
1381 ReplaceNode(Node, SLLI);
1382 return;
1383 }
1384 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1385 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
1386 OneUseOrZExtW && !IsCANDI) {
1387 SDNode *SRLIW = CurDAG->getMachineNode(
1388 RISCV::SRLIW, DL, VT, X,
1389 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1390 SDNode *SLLI = CurDAG->getMachineNode(
1391 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1392 CurDAG->getTargetConstant(Trailing, DL, VT));
1393 ReplaceNode(Node, SLLI);
1394 return;
1395 }
1396 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1397 if (Trailing > 0 && Leading + Trailing == 32 && C2 + Trailing < XLen &&
1398 OneUseOrZExtW && Subtarget->hasStdExtZba()) {
1399 SDNode *SRLI = CurDAG->getMachineNode(
1400 RISCV::SRLI, DL, VT, X,
1401 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1402 SDNode *SLLI_UW = CurDAG->getMachineNode(
1403 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1404 CurDAG->getTargetConstant(Trailing, DL, VT));
1405 ReplaceNode(Node, SLLI_UW);
1406 return;
1407 }
1408 }
1409
1410 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1411 // shifted mask with no leading zeros and c3 trailing zeros.
1412 if (LeftShift && isShiftedMask_64(C1)) {
1413 unsigned Leading = XLen - llvm::bit_width(C1);
1414 unsigned Trailing = llvm::countr_zero(C1);
1415 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
1416 SDNode *SRLI = CurDAG->getMachineNode(
1417 RISCV::SRLI, DL, VT, X,
1418 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1419 SDNode *SLLI = CurDAG->getMachineNode(
1420 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1421 CurDAG->getTargetConstant(Trailing, DL, VT));
1422 ReplaceNode(Node, SLLI);
1423 return;
1424 }
1425 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1426 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
1427 SDNode *SRLIW = CurDAG->getMachineNode(
1428 RISCV::SRLIW, DL, VT, X,
1429 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1430 SDNode *SLLI = CurDAG->getMachineNode(
1431 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1432 CurDAG->getTargetConstant(Trailing, DL, VT));
1433 ReplaceNode(Node, SLLI);
1434 return;
1435 }
1436
1437 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1438 if (C2 < Trailing && Leading + Trailing == 32 && OneUseOrZExtW &&
1439 Subtarget->hasStdExtZba()) {
1440 SDNode *SRLI = CurDAG->getMachineNode(
1441 RISCV::SRLI, DL, VT, X,
1442 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1443 SDNode *SLLI_UW = CurDAG->getMachineNode(
1444 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1445 CurDAG->getTargetConstant(Trailing, DL, VT));
1446 ReplaceNode(Node, SLLI_UW);
1447 return;
1448 }
1449 }
1450 }
1451
1452 const uint64_t C1 = N1C->getZExtValue();
1453
1454 // Turn (and (sra x, c2), c1) -> (srli (srai x, c2-c3), c3) if c1 is a mask
1455 // with c3 leading zeros and c2 is larger than c3.
1456 if (N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
1457 N0.hasOneUse()) {
1458 unsigned C2 = N0.getConstantOperandVal(1);
1459 unsigned XLen = Subtarget->getXLen();
1460 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1461
1462 SDValue X = N0.getOperand(0);
1463
1464 // Prefer SRAIW + ANDI when possible.
1465 bool Skip = C2 > 32 && isInt<12>(N1C->getSExtValue()) &&
1466 X.getOpcode() == ISD::SHL &&
1467 isa<ConstantSDNode>(X.getOperand(1)) &&
1468 X.getConstantOperandVal(1) == 32;
1469 if (isMask_64(C1) && !Skip) {
1470 unsigned Leading = XLen - llvm::bit_width(C1);
1471 if (C2 > Leading) {
1472 SDNode *SRAI = CurDAG->getMachineNode(
1473 RISCV::SRAI, DL, VT, X,
1474 CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1475 SDNode *SRLI = CurDAG->getMachineNode(
1476 RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1477 CurDAG->getTargetConstant(Leading, DL, VT));
1478 ReplaceNode(Node, SRLI);
1479 return;
1480 }
1481 }
1482 }
1483
1484 // If C1 masks off the upper bits only (but can't be formed as an
1485 // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1486 // available.
1487 // Transform (and x, C1)
1488 // -> (<bfextract> x, msb, lsb)
1489 if (isMask_64(C1) && !isInt<12>(N1C->getSExtValue())) {
1490 const unsigned Msb = llvm::bit_width(C1) - 1;
1491 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0))
1492 return;
1493 }
1494
1495 if (tryShrinkShlLogicImm(Node))
1496 return;
1497
1498 break;
1499 }
1500 case ISD::MUL: {
1501 // Special case for calculating (mul (and X, C2), C1) where the full product
1502 // fits in XLen bits. We can shift X left by the number of leading zeros in
1503 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1504 // product has XLen trailing zeros, putting it in the output of MULHU. This
1505 // can avoid materializing a constant in a register for C2.
1506
1507 // RHS should be a constant.
1508 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1509 if (!N1C || !N1C->hasOneUse())
1510 break;
1511
1512 // LHS should be an AND with constant.
1513 SDValue N0 = Node->getOperand(0);
1514 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1515 break;
1516
1518
1519 // Constant should be a mask.
1520 if (!isMask_64(C2))
1521 break;
1522
1523 // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1524 // multiple users or the constant is a simm12. This prevents inserting a
1525 // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1526 // make it more costly to materialize. Otherwise, using a SLLI might allow
1527 // it to be compressed.
1528 bool IsANDIOrZExt =
1529 isInt<12>(C2) ||
1530 (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
1531 // With XTHeadBb, we can use TH.EXTU.
1532 IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
1533 if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse()))
1534 break;
1535 // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1536 // the constant is a simm32.
1537 bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
1538 // With XTHeadBb, we can use TH.EXTU.
1539 IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
1540 if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse()))
1541 break;
1542
1543 // We need to shift left the AND input and C1 by a total of XLen bits.
1544
1545 // How far left do we need to shift the AND input?
1546 unsigned XLen = Subtarget->getXLen();
1547 unsigned LeadingZeros = XLen - llvm::bit_width(C2);
1548
1549 // The constant gets shifted by the remaining amount unless that would
1550 // shift bits out.
1551 uint64_t C1 = N1C->getZExtValue();
1552 unsigned ConstantShift = XLen - LeadingZeros;
1553 if (ConstantShift > (XLen - llvm::bit_width(C1)))
1554 break;
1555
1556 uint64_t ShiftedC1 = C1 << ConstantShift;
1557 // If this RV32, we need to sign extend the constant.
1558 if (XLen == 32)
1559 ShiftedC1 = SignExtend64<32>(ShiftedC1);
1560
1561 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1562 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode();
1563 SDNode *SLLI =
1564 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1565 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1566 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,
1567 SDValue(SLLI, 0), SDValue(Imm, 0));
1568 ReplaceNode(Node, MULHU);
1569 return;
1570 }
1571 case ISD::LOAD: {
1572 if (tryIndexedLoad(Node))
1573 return;
1574
1575 if (Subtarget->hasVendorXCVmem() && !Subtarget->is64Bit()) {
1576 // We match post-incrementing load here
1577 LoadSDNode *Load = cast<LoadSDNode>(Node);
1578 if (Load->getAddressingMode() != ISD::POST_INC)
1579 break;
1580
1581 SDValue Chain = Node->getOperand(0);
1582 SDValue Base = Node->getOperand(1);
1583 SDValue Offset = Node->getOperand(2);
1584
1585 bool Simm12 = false;
1586 bool SignExtend = Load->getExtensionType() == ISD::SEXTLOAD;
1587
1588 if (auto ConstantOffset = dyn_cast<ConstantSDNode>(Offset)) {
1589 int ConstantVal = ConstantOffset->getSExtValue();
1590 Simm12 = isInt<12>(ConstantVal);
1591 if (Simm12)
1592 Offset = CurDAG->getTargetConstant(ConstantVal, SDLoc(Offset),
1593 Offset.getValueType());
1594 }
1595
1596 unsigned Opcode = 0;
1597 switch (Load->getMemoryVT().getSimpleVT().SimpleTy) {
1598 case MVT::i8:
1599 if (Simm12 && SignExtend)
1600 Opcode = RISCV::CV_LB_ri_inc;
1601 else if (Simm12 && !SignExtend)
1602 Opcode = RISCV::CV_LBU_ri_inc;
1603 else if (!Simm12 && SignExtend)
1604 Opcode = RISCV::CV_LB_rr_inc;
1605 else
1606 Opcode = RISCV::CV_LBU_rr_inc;
1607 break;
1608 case MVT::i16:
1609 if (Simm12 && SignExtend)
1610 Opcode = RISCV::CV_LH_ri_inc;
1611 else if (Simm12 && !SignExtend)
1612 Opcode = RISCV::CV_LHU_ri_inc;
1613 else if (!Simm12 && SignExtend)
1614 Opcode = RISCV::CV_LH_rr_inc;
1615 else
1616 Opcode = RISCV::CV_LHU_rr_inc;
1617 break;
1618 case MVT::i32:
1619 if (Simm12)
1620 Opcode = RISCV::CV_LW_ri_inc;
1621 else
1622 Opcode = RISCV::CV_LW_rr_inc;
1623 break;
1624 default:
1625 break;
1626 }
1627 if (!Opcode)
1628 break;
1629
1630 ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, XLenVT, XLenVT,
1631 Chain.getSimpleValueType(), Base,
1632 Offset, Chain));
1633 return;
1634 }
1635 break;
1636 }
1638 unsigned IntNo = Node->getConstantOperandVal(0);
1639 switch (IntNo) {
1640 // By default we do not custom select any intrinsic.
1641 default:
1642 break;
1643 case Intrinsic::riscv_vmsgeu:
1644 case Intrinsic::riscv_vmsge: {
1645 SDValue Src1 = Node->getOperand(1);
1646 SDValue Src2 = Node->getOperand(2);
1647 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
1648 bool IsCmpUnsignedZero = false;
1649 // Only custom select scalar second operand.
1650 if (Src2.getValueType() != XLenVT)
1651 break;
1652 // Small constants are handled with patterns.
1653 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1654 int64_t CVal = C->getSExtValue();
1655 if (CVal >= -15 && CVal <= 16) {
1656 if (!IsUnsigned || CVal != 0)
1657 break;
1658 IsCmpUnsignedZero = true;
1659 }
1660 }
1661 MVT Src1VT = Src1.getSimpleValueType();
1662 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode;
1663 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1664 default:
1665 llvm_unreachable("Unexpected LMUL!");
1666#define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b) \
1667 case RISCVII::VLMUL::lmulenum: \
1668 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1669 : RISCV::PseudoVMSLT_VX_##suffix; \
1670 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \
1671 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \
1672 break;
1673 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F8, MF8, B1)
1674 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F4, MF4, B2)
1675 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F2, MF2, B4)
1677 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_2, M2, B16)
1678 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_4, M4, B32)
1679 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_8, M8, B64)
1680#undef CASE_VMSLT_VMNAND_VMSET_OPCODES
1681 }
1683 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1684 SDValue VL;
1685 selectVLOp(Node->getOperand(3), VL);
1686
1687 // If vmsgeu with 0 immediate, expand it to vmset.
1688 if (IsCmpUnsignedZero) {
1689 ReplaceNode(Node, CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW));
1690 return;
1691 }
1692
1693 // Expand to
1694 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
1695 SDValue Cmp = SDValue(
1696 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1697 0);
1698 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT,
1699 {Cmp, Cmp, VL, SEW}));
1700 return;
1701 }
1702 case Intrinsic::riscv_vmsgeu_mask:
1703 case Intrinsic::riscv_vmsge_mask: {
1704 SDValue Src1 = Node->getOperand(2);
1705 SDValue Src2 = Node->getOperand(3);
1706 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
1707 bool IsCmpUnsignedZero = false;
1708 // Only custom select scalar second operand.
1709 if (Src2.getValueType() != XLenVT)
1710 break;
1711 // Small constants are handled with patterns.
1712 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1713 int64_t CVal = C->getSExtValue();
1714 if (CVal >= -15 && CVal <= 16) {
1715 if (!IsUnsigned || CVal != 0)
1716 break;
1717 IsCmpUnsignedZero = true;
1718 }
1719 }
1720 MVT Src1VT = Src1.getSimpleValueType();
1721 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
1722 VMOROpcode;
1723 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1724 default:
1725 llvm_unreachable("Unexpected LMUL!");
1726#define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b) \
1727 case RISCVII::VLMUL::lmulenum: \
1728 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1729 : RISCV::PseudoVMSLT_VX_##suffix; \
1730 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \
1731 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \
1732 break;
1733 CASE_VMSLT_OPCODES(LMUL_F8, MF8, B1)
1734 CASE_VMSLT_OPCODES(LMUL_F4, MF4, B2)
1735 CASE_VMSLT_OPCODES(LMUL_F2, MF2, B4)
1736 CASE_VMSLT_OPCODES(LMUL_1, M1, B8)
1737 CASE_VMSLT_OPCODES(LMUL_2, M2, B16)
1738 CASE_VMSLT_OPCODES(LMUL_4, M4, B32)
1739 CASE_VMSLT_OPCODES(LMUL_8, M8, B64)
1740#undef CASE_VMSLT_OPCODES
1741 }
1742 // Mask operations use the LMUL from the mask type.
1743 switch (RISCVTargetLowering::getLMUL(VT)) {
1744 default:
1745 llvm_unreachable("Unexpected LMUL!");
1746#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \
1747 case RISCVII::VLMUL::lmulenum: \
1748 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \
1749 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \
1750 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \
1751 break;
1752 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, MF8)
1753 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, MF4)
1754 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, MF2)
1759#undef CASE_VMXOR_VMANDN_VMOR_OPCODES
1760 }
1762 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1763 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
1764 SDValue VL;
1765 selectVLOp(Node->getOperand(5), VL);
1766 SDValue MaskedOff = Node->getOperand(1);
1767 SDValue Mask = Node->getOperand(4);
1768
1769 // If vmsgeu_mask with 0 immediate, expand it to vmor mask, maskedoff.
1770 if (IsCmpUnsignedZero) {
1771 // We don't need vmor if the MaskedOff and the Mask are the same
1772 // value.
1773 if (Mask == MaskedOff) {
1774 ReplaceUses(Node, Mask.getNode());
1775 return;
1776 }
1777 ReplaceNode(Node,
1778 CurDAG->getMachineNode(VMOROpcode, DL, VT,
1779 {Mask, MaskedOff, VL, MaskSEW}));
1780 return;
1781 }
1782
1783 // If the MaskedOff value and the Mask are the same value use
1784 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
1785 // This avoids needing to copy v0 to vd before starting the next sequence.
1786 if (Mask == MaskedOff) {
1787 SDValue Cmp = SDValue(
1788 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1789 0);
1790 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
1791 {Mask, Cmp, VL, MaskSEW}));
1792 return;
1793 }
1794
1795 // Mask needs to be copied to V0.
1797 RISCV::V0, Mask, SDValue());
1798 SDValue Glue = Chain.getValue(1);
1799 SDValue V0 = CurDAG->getRegister(RISCV::V0, VT);
1800
1801 // Otherwise use
1802 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
1803 // The result is mask undisturbed.
1804 // We use the same instructions to emulate mask agnostic behavior, because
1805 // the agnostic result can be either undisturbed or all 1.
1806 SDValue Cmp = SDValue(
1807 CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
1808 {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}),
1809 0);
1810 // vmxor.mm vd, vd, v0 is used to update active value.
1811 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
1812 {Cmp, Mask, VL, MaskSEW}));
1813 return;
1814 }
1815 case Intrinsic::riscv_vsetvli:
1816 case Intrinsic::riscv_vsetvlimax:
1817 return selectVSETVLI(Node);
1818 }
1819 break;
1820 }
1822 unsigned IntNo = Node->getConstantOperandVal(1);
1823 switch (IntNo) {
1824 // By default we do not custom select any intrinsic.
1825 default:
1826 break;
1827 case Intrinsic::riscv_vlseg2:
1828 case Intrinsic::riscv_vlseg3:
1829 case Intrinsic::riscv_vlseg4:
1830 case Intrinsic::riscv_vlseg5:
1831 case Intrinsic::riscv_vlseg6:
1832 case Intrinsic::riscv_vlseg7:
1833 case Intrinsic::riscv_vlseg8: {
1834 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);
1835 return;
1836 }
1837 case Intrinsic::riscv_vlseg2_mask:
1838 case Intrinsic::riscv_vlseg3_mask:
1839 case Intrinsic::riscv_vlseg4_mask:
1840 case Intrinsic::riscv_vlseg5_mask:
1841 case Intrinsic::riscv_vlseg6_mask:
1842 case Intrinsic::riscv_vlseg7_mask:
1843 case Intrinsic::riscv_vlseg8_mask: {
1844 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);
1845 return;
1846 }
1847 case Intrinsic::riscv_vlsseg2:
1848 case Intrinsic::riscv_vlsseg3:
1849 case Intrinsic::riscv_vlsseg4:
1850 case Intrinsic::riscv_vlsseg5:
1851 case Intrinsic::riscv_vlsseg6:
1852 case Intrinsic::riscv_vlsseg7:
1853 case Intrinsic::riscv_vlsseg8: {
1854 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);
1855 return;
1856 }
1857 case Intrinsic::riscv_vlsseg2_mask:
1858 case Intrinsic::riscv_vlsseg3_mask:
1859 case Intrinsic::riscv_vlsseg4_mask:
1860 case Intrinsic::riscv_vlsseg5_mask:
1861 case Intrinsic::riscv_vlsseg6_mask:
1862 case Intrinsic::riscv_vlsseg7_mask:
1863 case Intrinsic::riscv_vlsseg8_mask: {
1864 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);
1865 return;
1866 }
1867 case Intrinsic::riscv_vloxseg2:
1868 case Intrinsic::riscv_vloxseg3:
1869 case Intrinsic::riscv_vloxseg4:
1870 case Intrinsic::riscv_vloxseg5:
1871 case Intrinsic::riscv_vloxseg6:
1872 case Intrinsic::riscv_vloxseg7:
1873 case Intrinsic::riscv_vloxseg8:
1874 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);
1875 return;
1876 case Intrinsic::riscv_vluxseg2:
1877 case Intrinsic::riscv_vluxseg3:
1878 case Intrinsic::riscv_vluxseg4:
1879 case Intrinsic::riscv_vluxseg5:
1880 case Intrinsic::riscv_vluxseg6:
1881 case Intrinsic::riscv_vluxseg7:
1882 case Intrinsic::riscv_vluxseg8:
1883 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);
1884 return;
1885 case Intrinsic::riscv_vloxseg2_mask:
1886 case Intrinsic::riscv_vloxseg3_mask:
1887 case Intrinsic::riscv_vloxseg4_mask:
1888 case Intrinsic::riscv_vloxseg5_mask:
1889 case Intrinsic::riscv_vloxseg6_mask:
1890 case Intrinsic::riscv_vloxseg7_mask:
1891 case Intrinsic::riscv_vloxseg8_mask:
1892 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);
1893 return;
1894 case Intrinsic::riscv_vluxseg2_mask:
1895 case Intrinsic::riscv_vluxseg3_mask:
1896 case Intrinsic::riscv_vluxseg4_mask:
1897 case Intrinsic::riscv_vluxseg5_mask:
1898 case Intrinsic::riscv_vluxseg6_mask:
1899 case Intrinsic::riscv_vluxseg7_mask:
1900 case Intrinsic::riscv_vluxseg8_mask:
1901 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
1902 return;
1903 case Intrinsic::riscv_vlseg8ff:
1904 case Intrinsic::riscv_vlseg7ff:
1905 case Intrinsic::riscv_vlseg6ff:
1906 case Intrinsic::riscv_vlseg5ff:
1907 case Intrinsic::riscv_vlseg4ff:
1908 case Intrinsic::riscv_vlseg3ff:
1909 case Intrinsic::riscv_vlseg2ff: {
1910 selectVLSEGFF(Node, /*IsMasked*/ false);
1911 return;
1912 }
1913 case Intrinsic::riscv_vlseg8ff_mask:
1914 case Intrinsic::riscv_vlseg7ff_mask:
1915 case Intrinsic::riscv_vlseg6ff_mask:
1916 case Intrinsic::riscv_vlseg5ff_mask:
1917 case Intrinsic::riscv_vlseg4ff_mask:
1918 case Intrinsic::riscv_vlseg3ff_mask:
1919 case Intrinsic::riscv_vlseg2ff_mask: {
1920 selectVLSEGFF(Node, /*IsMasked*/ true);
1921 return;
1922 }
1923 case Intrinsic::riscv_vloxei:
1924 case Intrinsic::riscv_vloxei_mask:
1925 case Intrinsic::riscv_vluxei:
1926 case Intrinsic::riscv_vluxei_mask: {
1927 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
1928 IntNo == Intrinsic::riscv_vluxei_mask;
1929 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
1930 IntNo == Intrinsic::riscv_vloxei_mask;
1931
1932 MVT VT = Node->getSimpleValueType(0);
1933 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1934
1935 unsigned CurOp = 2;
1937 Operands.push_back(Node->getOperand(CurOp++));
1938
1939 MVT IndexVT;
1940 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1941 /*IsStridedOrIndexed*/ true, Operands,
1942 /*IsLoad=*/true, &IndexVT);
1943
1945 "Element count mismatch");
1946
1948 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
1949 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
1950 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
1951 report_fatal_error("The V extension does not support EEW=64 for index "
1952 "values when XLEN=32");
1953 }
1954 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
1955 IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
1956 static_cast<unsigned>(IndexLMUL));
1957 MachineSDNode *Load =
1958 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1959
1960 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1961 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1962
1963 ReplaceNode(Node, Load);
1964 return;
1965 }
1966 case Intrinsic::riscv_vlm:
1967 case Intrinsic::riscv_vle:
1968 case Intrinsic::riscv_vle_mask:
1969 case Intrinsic::riscv_vlse:
1970 case Intrinsic::riscv_vlse_mask: {
1971 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
1972 IntNo == Intrinsic::riscv_vlse_mask;
1973 bool IsStrided =
1974 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
1975
1976 MVT VT = Node->getSimpleValueType(0);
1977 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1978
1979 // The riscv_vlm intrinsic are always tail agnostic and no passthru
1980 // operand at the IR level. In pseudos, they have both policy and
1981 // passthru operand. The passthru operand is needed to track the
1982 // "tail undefined" state, and the policy is there just for
1983 // for consistency - it will always be "don't care" for the
1984 // unmasked form.
1985 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
1986 unsigned CurOp = 2;
1988 if (HasPassthruOperand)
1989 Operands.push_back(Node->getOperand(CurOp++));
1990 else {
1991 // We eagerly lower to implicit_def (instead of undef), as we
1992 // otherwise fail to select nodes such as: nxv1i1 = undef
1993 SDNode *Passthru =
1994 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
1995 Operands.push_back(SDValue(Passthru, 0));
1996 }
1997 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
1998 Operands, /*IsLoad=*/true);
1999
2001 const RISCV::VLEPseudo *P =
2002 RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW,
2003 static_cast<unsigned>(LMUL));
2004 MachineSDNode *Load =
2005 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2006
2007 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2008 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
2009
2010 ReplaceNode(Node, Load);
2011 return;
2012 }
2013 case Intrinsic::riscv_vleff:
2014 case Intrinsic::riscv_vleff_mask: {
2015 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
2016
2017 MVT VT = Node->getSimpleValueType(0);
2018 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2019
2020 unsigned CurOp = 2;
2022 Operands.push_back(Node->getOperand(CurOp++));
2023 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2024 /*IsStridedOrIndexed*/ false, Operands,
2025 /*IsLoad=*/true);
2026
2028 const RISCV::VLEPseudo *P =
2029 RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true,
2030 Log2SEW, static_cast<unsigned>(LMUL));
2032 P->Pseudo, DL, Node->getVTList(), Operands);
2033 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2034 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
2035
2036 ReplaceNode(Node, Load);
2037 return;
2038 }
2039 }
2040 break;
2041 }
2042 case ISD::INTRINSIC_VOID: {
2043 unsigned IntNo = Node->getConstantOperandVal(1);
2044 switch (IntNo) {
2045 case Intrinsic::riscv_vsseg2:
2046 case Intrinsic::riscv_vsseg3:
2047 case Intrinsic::riscv_vsseg4:
2048 case Intrinsic::riscv_vsseg5:
2049 case Intrinsic::riscv_vsseg6:
2050 case Intrinsic::riscv_vsseg7:
2051 case Intrinsic::riscv_vsseg8: {
2052 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);
2053 return;
2054 }
2055 case Intrinsic::riscv_vsseg2_mask:
2056 case Intrinsic::riscv_vsseg3_mask:
2057 case Intrinsic::riscv_vsseg4_mask:
2058 case Intrinsic::riscv_vsseg5_mask:
2059 case Intrinsic::riscv_vsseg6_mask:
2060 case Intrinsic::riscv_vsseg7_mask:
2061 case Intrinsic::riscv_vsseg8_mask: {
2062 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);
2063 return;
2064 }
2065 case Intrinsic::riscv_vssseg2:
2066 case Intrinsic::riscv_vssseg3:
2067 case Intrinsic::riscv_vssseg4:
2068 case Intrinsic::riscv_vssseg5:
2069 case Intrinsic::riscv_vssseg6:
2070 case Intrinsic::riscv_vssseg7:
2071 case Intrinsic::riscv_vssseg8: {
2072 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);
2073 return;
2074 }
2075 case Intrinsic::riscv_vssseg2_mask:
2076 case Intrinsic::riscv_vssseg3_mask:
2077 case Intrinsic::riscv_vssseg4_mask:
2078 case Intrinsic::riscv_vssseg5_mask:
2079 case Intrinsic::riscv_vssseg6_mask:
2080 case Intrinsic::riscv_vssseg7_mask:
2081 case Intrinsic::riscv_vssseg8_mask: {
2082 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);
2083 return;
2084 }
2085 case Intrinsic::riscv_vsoxseg2:
2086 case Intrinsic::riscv_vsoxseg3:
2087 case Intrinsic::riscv_vsoxseg4:
2088 case Intrinsic::riscv_vsoxseg5:
2089 case Intrinsic::riscv_vsoxseg6:
2090 case Intrinsic::riscv_vsoxseg7:
2091 case Intrinsic::riscv_vsoxseg8:
2092 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);
2093 return;
2094 case Intrinsic::riscv_vsuxseg2:
2095 case Intrinsic::riscv_vsuxseg3:
2096 case Intrinsic::riscv_vsuxseg4:
2097 case Intrinsic::riscv_vsuxseg5:
2098 case Intrinsic::riscv_vsuxseg6:
2099 case Intrinsic::riscv_vsuxseg7:
2100 case Intrinsic::riscv_vsuxseg8:
2101 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);
2102 return;
2103 case Intrinsic::riscv_vsoxseg2_mask:
2104 case Intrinsic::riscv_vsoxseg3_mask:
2105 case Intrinsic::riscv_vsoxseg4_mask:
2106 case Intrinsic::riscv_vsoxseg5_mask:
2107 case Intrinsic::riscv_vsoxseg6_mask:
2108 case Intrinsic::riscv_vsoxseg7_mask:
2109 case Intrinsic::riscv_vsoxseg8_mask:
2110 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);
2111 return;
2112 case Intrinsic::riscv_vsuxseg2_mask:
2113 case Intrinsic::riscv_vsuxseg3_mask:
2114 case Intrinsic::riscv_vsuxseg4_mask:
2115 case Intrinsic::riscv_vsuxseg5_mask:
2116 case Intrinsic::riscv_vsuxseg6_mask:
2117 case Intrinsic::riscv_vsuxseg7_mask:
2118 case Intrinsic::riscv_vsuxseg8_mask:
2119 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
2120 return;
2121 case Intrinsic::riscv_vsoxei:
2122 case Intrinsic::riscv_vsoxei_mask:
2123 case Intrinsic::riscv_vsuxei:
2124 case Intrinsic::riscv_vsuxei_mask: {
2125 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
2126 IntNo == Intrinsic::riscv_vsuxei_mask;
2127 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
2128 IntNo == Intrinsic::riscv_vsoxei_mask;
2129
2130 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2131 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2132
2133 unsigned CurOp = 2;
2135 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2136
2137 MVT IndexVT;
2138 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2139 /*IsStridedOrIndexed*/ true, Operands,
2140 /*IsLoad=*/false, &IndexVT);
2141
2143 "Element count mismatch");
2144
2146 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
2147 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2148 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2149 report_fatal_error("The V extension does not support EEW=64 for index "
2150 "values when XLEN=32");
2151 }
2152 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
2153 IsMasked, IsOrdered, IndexLog2EEW,
2154 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
2155 MachineSDNode *Store =
2156 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2157
2158 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2159 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
2160
2161 ReplaceNode(Node, Store);
2162 return;
2163 }
2164 case Intrinsic::riscv_vsm:
2165 case Intrinsic::riscv_vse:
2166 case Intrinsic::riscv_vse_mask:
2167 case Intrinsic::riscv_vsse:
2168 case Intrinsic::riscv_vsse_mask: {
2169 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
2170 IntNo == Intrinsic::riscv_vsse_mask;
2171 bool IsStrided =
2172 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
2173
2174 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2175 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2176
2177 unsigned CurOp = 2;
2179 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2180
2181 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2182 Operands);
2183
2185 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
2186 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
2187 MachineSDNode *Store =
2188 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2189 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2190 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
2191
2192 ReplaceNode(Node, Store);
2193 return;
2194 }
2195 case Intrinsic::riscv_sf_vc_x_se:
2196 case Intrinsic::riscv_sf_vc_i_se:
2197 selectSF_VC_X_SE(Node);
2198 return;
2199 }
2200 break;
2201 }
2202 case ISD::BITCAST: {
2203 MVT SrcVT = Node->getOperand(0).getSimpleValueType();
2204 // Just drop bitcasts between vectors if both are fixed or both are
2205 // scalable.
2206 if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
2207 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
2208 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2209 CurDAG->RemoveDeadNode(Node);
2210 return;
2211 }
2212 break;
2213 }
2214 case ISD::INSERT_SUBVECTOR: {
2215 SDValue V = Node->getOperand(0);
2216 SDValue SubV = Node->getOperand(1);
2217 SDLoc DL(SubV);
2218 auto Idx = Node->getConstantOperandVal(2);
2219 MVT SubVecVT = SubV.getSimpleValueType();
2220
2221 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2222 MVT SubVecContainerVT = SubVecVT;
2223 // Establish the correct scalable-vector types for any fixed-length type.
2224 if (SubVecVT.isFixedLengthVector()) {
2225 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
2227 [[maybe_unused]] bool ExactlyVecRegSized =
2228 Subtarget->expandVScale(SubVecVT.getSizeInBits())
2229 .isKnownMultipleOf(Subtarget->expandVScale(VecRegSize));
2230 assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())
2231 .getKnownMinValue()));
2232 assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
2233 }
2234 MVT ContainerVT = VT;
2235 if (VT.isFixedLengthVector())
2236 ContainerVT = TLI.getContainerForFixedLengthVector(VT);
2237
2238 const auto *TRI = Subtarget->getRegisterInfo();
2239 unsigned SubRegIdx;
2240 std::tie(SubRegIdx, Idx) =
2242 ContainerVT, SubVecContainerVT, Idx, TRI);
2243
2244 // If the Idx hasn't been completely eliminated then this is a subvector
2245 // insert which doesn't naturally align to a vector register. These must
2246 // be handled using instructions to manipulate the vector registers.
2247 if (Idx != 0)
2248 break;
2249
2250 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT);
2251 [[maybe_unused]] bool IsSubVecPartReg =
2252 SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
2253 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
2254 SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
2255 assert((!IsSubVecPartReg || V.isUndef()) &&
2256 "Expecting lowering to have created legal INSERT_SUBVECTORs when "
2257 "the subvector is smaller than a full-sized register");
2258
2259 // If we haven't set a SubRegIdx, then we must be going between
2260 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
2261 if (SubRegIdx == RISCV::NoSubRegister) {
2262 unsigned InRegClassID =
2265 InRegClassID &&
2266 "Unexpected subvector extraction");
2267 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2268 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
2269 DL, VT, SubV, RC);
2270 ReplaceNode(Node, NewNode);
2271 return;
2272 }
2273
2274 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV);
2275 ReplaceNode(Node, Insert.getNode());
2276 return;
2277 }
2279 SDValue V = Node->getOperand(0);
2280 auto Idx = Node->getConstantOperandVal(1);
2281 MVT InVT = V.getSimpleValueType();
2282 SDLoc DL(V);
2283
2284 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2285 MVT SubVecContainerVT = VT;
2286 // Establish the correct scalable-vector types for any fixed-length type.
2287 if (VT.isFixedLengthVector()) {
2288 assert(Idx == 0);
2289 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
2290 }
2291 if (InVT.isFixedLengthVector())
2292 InVT = TLI.getContainerForFixedLengthVector(InVT);
2293
2294 const auto *TRI = Subtarget->getRegisterInfo();
2295 unsigned SubRegIdx;
2296 std::tie(SubRegIdx, Idx) =
2298 InVT, SubVecContainerVT, Idx, TRI);
2299
2300 // If the Idx hasn't been completely eliminated then this is a subvector
2301 // extract which doesn't naturally align to a vector register. These must
2302 // be handled using instructions to manipulate the vector registers.
2303 if (Idx != 0)
2304 break;
2305
2306 // If we haven't set a SubRegIdx, then we must be going between
2307 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
2308 if (SubRegIdx == RISCV::NoSubRegister) {
2309 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT);
2311 InRegClassID &&
2312 "Unexpected subvector extraction");
2313 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2314 SDNode *NewNode =
2315 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
2316 ReplaceNode(Node, NewNode);
2317 return;
2318 }
2319
2320 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
2321 ReplaceNode(Node, Extract.getNode());
2322 return;
2323 }
2327 case RISCVISD::VFMV_V_F_VL: {
2328 // Try to match splat of a scalar load to a strided load with stride of x0.
2329 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
2330 Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
2331 if (!Node->getOperand(0).isUndef())
2332 break;
2333 SDValue Src = Node->getOperand(1);
2334 auto *Ld = dyn_cast<LoadSDNode>(Src);
2335 // Can't fold load update node because the second
2336 // output is used so that load update node can't be removed.
2337 if (!Ld || Ld->isIndexed())
2338 break;
2339 EVT MemVT = Ld->getMemoryVT();
2340 // The memory VT should be the same size as the element type.
2341 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
2342 break;
2343 if (!IsProfitableToFold(Src, Node, Node) ||
2344 !IsLegalToFold(Src, Node, Node, TM.getOptLevel()))
2345 break;
2346
2347 SDValue VL;
2348 if (IsScalarMove) {
2349 // We could deal with more VL if we update the VSETVLI insert pass to
2350 // avoid introducing more VSETVLI.
2351 if (!isOneConstant(Node->getOperand(2)))
2352 break;
2353 selectVLOp(Node->getOperand(2), VL);
2354 } else
2355 selectVLOp(Node->getOperand(2), VL);
2356
2357 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2358 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
2359
2360 // If VL=1, then we don't need to do a strided load and can just do a
2361 // regular load.
2362 bool IsStrided = !isOneConstant(VL);
2363
2364 // Only do a strided load if we have optimized zero-stride vector load.
2365 if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
2366 break;
2367
2369 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0),
2370 Ld->getBasePtr()};
2371 if (IsStrided)
2372 Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT));
2374 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
2375 Operands.append({VL, SEW, PolicyOp, Ld->getChain()});
2376
2378 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
2379 /*IsMasked*/ false, IsStrided, /*FF*/ false,
2380 Log2SEW, static_cast<unsigned>(LMUL));
2381 MachineSDNode *Load =
2382 CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands);
2383 // Update the chain.
2384 ReplaceUses(Src.getValue(1), SDValue(Load, 1));
2385 // Record the mem-refs
2386 CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});
2387 // Replace the splat with the vlse.
2388 ReplaceNode(Node, Load);
2389 return;
2390 }
2391 case ISD::PREFETCH:
2392 unsigned Locality = Node->getConstantOperandVal(3);
2393 if (Locality > 2)
2394 break;
2395
2396 if (auto *LoadStoreMem = dyn_cast<MemSDNode>(Node)) {
2397 MachineMemOperand *MMO = LoadStoreMem->getMemOperand();
2399
2400 int NontemporalLevel = 0;
2401 switch (Locality) {
2402 case 0:
2403 NontemporalLevel = 3; // NTL.ALL
2404 break;
2405 case 1:
2406 NontemporalLevel = 1; // NTL.PALL
2407 break;
2408 case 2:
2409 NontemporalLevel = 0; // NTL.P1
2410 break;
2411 default:
2412 llvm_unreachable("unexpected locality value.");
2413 }
2414
2415 if (NontemporalLevel & 0b1)
2417 if (NontemporalLevel & 0b10)
2419 }
2420 break;
2421 }
2422
2423 // Select the default instruction.
2424 SelectCode(Node);
2425}
2426
2428 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
2429 std::vector<SDValue> &OutOps) {
2430 // Always produce a register and immediate operand, as expected by
2431 // RISCVAsmPrinter::PrintAsmMemoryOperand.
2432 switch (ConstraintID) {
2435 SDValue Op0, Op1;
2436 [[maybe_unused]] bool Found = SelectAddrRegImm(Op, Op0, Op1);
2437 assert(Found && "SelectAddrRegImm should always succeed");
2438 OutOps.push_back(Op0);
2439 OutOps.push_back(Op1);
2440 return false;
2441 }
2443 OutOps.push_back(Op);
2444 OutOps.push_back(
2445 CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT()));
2446 return false;
2447 default:
2448 report_fatal_error("Unexpected asm memory constraint " +
2449 InlineAsm::getMemConstraintName(ConstraintID));
2450 }
2451
2452 return true;
2453}
2454
2456 SDValue &Offset) {
2457 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
2458 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
2459 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT());
2460 return true;
2461 }
2462
2463 return false;
2464}
2465
2466// Select a frame index and an optional immediate offset from an ADD or OR.
2468 SDValue &Offset) {
2470 return true;
2471
2473 return false;
2474
2475 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
2476 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2477 if (isInt<12>(CVal)) {
2478 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(),
2479 Subtarget->getXLenVT());
2481 Subtarget->getXLenVT());
2482 return true;
2483 }
2484 }
2485
2486 return false;
2487}
2488
2489// Fold constant addresses.
2490static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
2491 const MVT VT, const RISCVSubtarget *Subtarget,
2493 bool IsPrefetch = false) {
2494 if (!isa<ConstantSDNode>(Addr))
2495 return false;
2496
2497 int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue();
2498
2499 // If the constant is a simm12, we can fold the whole constant and use X0 as
2500 // the base. If the constant can be materialized with LUI+simm12, use LUI as
2501 // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
2502 int64_t Lo12 = SignExtend64<12>(CVal);
2503 int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
2504 if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
2505 if (IsPrefetch && (Lo12 & 0b11111) != 0)
2506 return false;
2507
2508 if (Hi) {
2509 int64_t Hi20 = (Hi >> 12) & 0xfffff;
2510 Base = SDValue(
2511 CurDAG->getMachineNode(RISCV::LUI, DL, VT,
2512 CurDAG->getTargetConstant(Hi20, DL, VT)),
2513 0);
2514 } else {
2515 Base = CurDAG->getRegister(RISCV::X0, VT);
2516 }
2517 Offset = CurDAG->getTargetConstant(Lo12, DL, VT);
2518 return true;
2519 }
2520
2521 // Ask how constant materialization would handle this constant.
2522 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget);
2523
2524 // If the last instruction would be an ADDI, we can fold its immediate and
2525 // emit the rest of the sequence as the base.
2526 if (Seq.back().getOpcode() != RISCV::ADDI)
2527 return false;
2528 Lo12 = Seq.back().getImm();
2529 if (IsPrefetch && (Lo12 & 0b11111) != 0)
2530 return false;
2531
2532 // Drop the last instruction.
2533 Seq.pop_back();
2534 assert(!Seq.empty() && "Expected more instructions in sequence");
2535
2536 Base = selectImmSeq(CurDAG, DL, VT, Seq);
2537 Offset = CurDAG->getTargetConstant(Lo12, DL, VT);
2538 return true;
2539}
2540
2541// Is this ADD instruction only used as the base pointer of scalar loads and
2542// stores?
2544 for (auto *Use : Add->uses()) {
2545 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
2546 Use->getOpcode() != ISD::ATOMIC_LOAD &&
2547 Use->getOpcode() != ISD::ATOMIC_STORE)
2548 return false;
2549 EVT VT = cast<MemSDNode>(Use)->getMemoryVT();
2550 if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
2551 VT != MVT::f64)
2552 return false;
2553 // Don't allow stores of the value. It must be used as the address.
2554 if (Use->getOpcode() == ISD::STORE &&
2555 cast<StoreSDNode>(Use)->getValue() == Add)
2556 return false;
2557 if (Use->getOpcode() == ISD::ATOMIC_STORE &&
2558 cast<AtomicSDNode>(Use)->getVal() == Add)
2559 return false;
2560 }
2561
2562 return true;
2563}
2564
2566 unsigned MaxShiftAmount,
2568 SDValue &Scale) {
2569 EVT VT = Addr.getSimpleValueType();
2570 auto UnwrapShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
2571 SDValue &Shift) {
2572 uint64_t ShiftAmt = 0;
2573 Index = N;
2574
2575 if (N.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N.getOperand(1))) {
2576 // Only match shifts by a value in range [0, MaxShiftAmount].
2577 if (N.getConstantOperandVal(1) <= MaxShiftAmount) {
2578 Index = N.getOperand(0);
2579 ShiftAmt = N.getConstantOperandVal(1);
2580 }
2581 }
2582
2583 Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT);
2584 return ShiftAmt != 0;
2585 };
2586
2587 if (Addr.getOpcode() == ISD::ADD) {
2588 if (auto *C1 = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
2589 SDValue AddrB = Addr.getOperand(0);
2590 if (AddrB.getOpcode() == ISD::ADD &&
2591 UnwrapShl(AddrB.getOperand(0), Index, Scale) &&
2592 !isa<ConstantSDNode>(AddrB.getOperand(1)) &&
2593 isInt<12>(C1->getSExtValue())) {
2594 // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
2595 SDValue C1Val =
2596 CurDAG->getTargetConstant(C1->getZExtValue(), SDLoc(Addr), VT);
2597 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
2598 AddrB.getOperand(1), C1Val),
2599 0);
2600 return true;
2601 }
2602 } else if (UnwrapShl(Addr.getOperand(0), Index, Scale)) {
2603 Base = Addr.getOperand(1);
2604 return true;
2605 } else {
2606 UnwrapShl(Addr.getOperand(1), Index, Scale);
2607 Base = Addr.getOperand(0);
2608 return true;
2609 }
2610 } else if (UnwrapShl(Addr, Index, Scale)) {
2611 EVT VT = Addr.getValueType();
2612 Base = CurDAG->getRegister(RISCV::X0, VT);
2613 return true;
2614 }
2615
2616 return false;
2617}
2618
2620 SDValue &Offset, bool IsINX) {
2622 return true;
2623
2624 SDLoc DL(Addr);
2625 MVT VT = Addr.getSimpleValueType();
2626
2627 if (Addr.getOpcode() == RISCVISD::ADD_LO) {
2628 Base = Addr.getOperand(0);
2629 Offset = Addr.getOperand(1);
2630 return true;
2631 }
2632
2633 int64_t RV32ZdinxRange = IsINX ? 4 : 0;
2635 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2636 if (isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) {
2637 Base = Addr.getOperand(0);
2638 if (Base.getOpcode() == RISCVISD::ADD_LO) {
2639 SDValue LoOperand = Base.getOperand(1);
2640 if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) {
2641 // If the Lo in (ADD_LO hi, lo) is a global variable's address
2642 // (its low part, really), then we can rely on the alignment of that
2643 // variable to provide a margin of safety before low part can overflow
2644 // the 12 bits of the load/store offset. Check if CVal falls within
2645 // that margin; if so (low part + CVal) can't overflow.
2646 const DataLayout &DL = CurDAG->getDataLayout();
2647 Align Alignment = commonAlignment(
2648 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
2649 if (CVal == 0 || Alignment > CVal) {
2650 int64_t CombinedOffset = CVal + GA->getOffset();
2651 Base = Base.getOperand(0);
2653 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(),
2654 CombinedOffset, GA->getTargetFlags());
2655 return true;
2656 }
2657 }
2658 }
2659
2660 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2661 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2662 Offset = CurDAG->getTargetConstant(CVal, DL, VT);
2663 return true;
2664 }
2665 }
2666
2667 // Handle ADD with large immediates.
2668 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2669 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2670 assert(!(isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) &&
2671 "simm12 not already handled?");
2672
2673 // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
2674 // an ADDI for part of the offset and fold the rest into the load/store.
2675 // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
2676 if (isInt<12>(CVal / 2) && isInt<12>(CVal - CVal / 2)) {
2677 int64_t Adj = CVal < 0 ? -2048 : 2047;
2678 Base = SDValue(
2679 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
2680 CurDAG->getTargetConstant(Adj, DL, VT)),
2681 0);
2682 Offset = CurDAG->getTargetConstant(CVal - Adj, DL, VT);
2683 return true;
2684 }
2685
2686 // For larger immediates, we might be able to save one instruction from
2687 // constant materialization by folding the Lo12 bits of the immediate into
2688 // the address. We should only do this if the ADD is only used by loads and
2689 // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
2690 // separately with the full materialized immediate creating extra
2691 // instructions.
2692 if (isWorthFoldingAdd(Addr) &&
2693 selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2694 Offset)) {
2695 // Insert an ADD instruction with the materialized Hi52 bits.
2696 Base = SDValue(
2697 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2698 0);
2699 return true;
2700 }
2701 }
2702
2703 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset))
2704 return true;
2705
2706 Base = Addr;
2707 Offset = CurDAG->getTargetConstant(0, DL, VT);
2708 return true;
2709}
2710
2711/// Similar to SelectAddrRegImm, except that the least significant 5 bits of
2712/// Offset shoule be all zeros.
2714 SDValue &Offset) {
2716 return true;
2717
2718 SDLoc DL(Addr);
2719 MVT VT = Addr.getSimpleValueType();
2720
2722 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2723 if (isInt<12>(CVal)) {
2724 Base = Addr.getOperand(0);
2725
2726 // Early-out if not a valid offset.
2727 if ((CVal & 0b11111) != 0) {
2728 Base = Addr;
2729 Offset = CurDAG->getTargetConstant(0, DL, VT);
2730 return true;
2731 }
2732
2733 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2734 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2735 Offset = CurDAG->getTargetConstant(CVal, DL, VT);
2736 return true;
2737 }
2738 }
2739
2740 // Handle ADD with large immediates.
2741 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2742 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2743 assert(!(isInt<12>(CVal) && isInt<12>(CVal)) &&
2744 "simm12 not already handled?");
2745
2746 // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
2747 // one instruction by folding adjustment (-2048 or 2016) into the address.
2748 if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {
2749 int64_t Adj = CVal < 0 ? -2048 : 2016;
2750 int64_t AdjustedOffset = CVal - Adj;
2752 RISCV::ADDI, DL, VT, Addr.getOperand(0),
2753 CurDAG->getTargetConstant(AdjustedOffset, DL, VT)),
2754 0);
2755 Offset = CurDAG->getTargetConstant(Adj, DL, VT);
2756 return true;
2757 }
2758
2759 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2760 Offset, true)) {
2761 // Insert an ADD instruction with the materialized Hi52 bits.
2762 Base = SDValue(
2763 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2764 0);
2765 return true;
2766 }
2767 }
2768
2769 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset, true))
2770 return true;
2771
2772 Base = Addr;
2773 Offset = CurDAG->getTargetConstant(0, DL, VT);
2774 return true;
2775}
2776
2778 SDValue &Offset) {
2779 if (Addr.getOpcode() != ISD::ADD)
2780 return false;
2781
2782 if (isa<ConstantSDNode>(Addr.getOperand(1)))
2783 return false;
2784
2785 Base = Addr.getOperand(1);
2786 Offset = Addr.getOperand(0);
2787 return true;
2788}
2789
2791 SDValue &ShAmt) {
2792 ShAmt = N;
2793
2794 // Peek through zext.
2795 if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)
2796 ShAmt = ShAmt.getOperand(0);
2797
2798 // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
2799 // amount. If there is an AND on the shift amount, we can bypass it if it
2800 // doesn't affect any of those bits.
2801 if (ShAmt.getOpcode() == ISD::AND &&
2802 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2803 const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
2804
2805 // Since the max shift amount is a power of 2 we can subtract 1 to make a
2806 // mask that covers the bits needed to represent all shift amounts.
2807 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
2808 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
2809
2810 if (ShMask.isSubsetOf(AndMask)) {
2811 ShAmt = ShAmt.getOperand(0);
2812 } else {
2813 // SimplifyDemandedBits may have optimized the mask so try restoring any
2814 // bits that are known zero.
2815 KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0));
2816 if (!ShMask.isSubsetOf(AndMask | Known.Zero))
2817 return true;
2818 ShAmt = ShAmt.getOperand(0);
2819 }
2820 }
2821
2822 if (ShAmt.getOpcode() == ISD::ADD &&
2823 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2824 uint64_t Imm = ShAmt.getConstantOperandVal(1);
2825 // If we are shifting by X+N where N == 0 mod Size, then just shift by X
2826 // to avoid the ADD.
2827 if (Imm != 0 && Imm % ShiftWidth == 0) {
2828 ShAmt = ShAmt.getOperand(0);
2829 return true;
2830 }
2831 } else if (ShAmt.getOpcode() == ISD::SUB &&
2832 isa<ConstantSDNode>(ShAmt.getOperand(0))) {
2833 uint64_t Imm = ShAmt.getConstantOperandVal(0);
2834 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
2835 // generate a NEG instead of a SUB of a constant.
2836 if (Imm != 0 && Imm % ShiftWidth == 0) {
2837 SDLoc DL(ShAmt);
2838 EVT VT = ShAmt.getValueType();
2839 SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);
2840 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
2841 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
2842 ShAmt.getOperand(1));
2843 ShAmt = SDValue(Neg, 0);
2844 return true;
2845 }
2846 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
2847 // to generate a NOT instead of a SUB of a constant.
2848 if (Imm % ShiftWidth == ShiftWidth - 1) {
2849 SDLoc DL(ShAmt);
2850 EVT VT = ShAmt.getValueType();
2851 MachineSDNode *Not =
2852 CurDAG->getMachineNode(RISCV::XORI, DL, VT, ShAmt.getOperand(1),
2853 CurDAG->getTargetConstant(-1, DL, VT));
2854 ShAmt = SDValue(Not, 0);
2855 return true;
2856 }
2857 }
2858
2859 return true;
2860}
2861
2862/// RISC-V doesn't have general instructions for integer setne/seteq, but we can
2863/// check for equality with 0. This function emits instructions that convert the
2864/// seteq/setne into something that can be compared with 0.
2865/// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.
2866/// ISD::SETNE).
2868 SDValue &Val) {
2869 assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
2870 "Unexpected condition code!");
2871
2872 // We're looking for a setcc.
2873 if (N->getOpcode() != ISD::SETCC)
2874 return false;
2875
2876 // Must be an equality comparison.
2877 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
2878 if (CCVal != ExpectedCCVal)
2879 return false;
2880
2881 SDValue LHS = N->getOperand(0);
2882 SDValue RHS = N->getOperand(1);
2883
2884 if (!LHS.getValueType().isScalarInteger())
2885 return false;
2886
2887 // If the RHS side is 0, we don't need any extra instructions, return the LHS.
2888 if (isNullConstant(RHS)) {
2889 Val = LHS;
2890 return true;
2891 }
2892
2893 SDLoc DL(N);
2894
2895 if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
2896 int64_t CVal = C->getSExtValue();
2897 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
2898 // non-zero otherwise.
2899 if (CVal == -2048) {
2900 Val =
2902 RISCV::XORI, DL, N->getValueType(0), LHS,
2903 CurDAG->getTargetConstant(CVal, DL, N->getValueType(0))),
2904 0);
2905 return true;
2906 }
2907 // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the
2908 // LHS is equal to the RHS and non-zero otherwise.
2909 if (isInt<12>(CVal) || CVal == 2048) {
2910 Val =
2912 RISCV::ADDI, DL, N->getValueType(0), LHS,
2913 CurDAG->getTargetConstant(-CVal, DL, N->getValueType(0))),
2914 0);
2915 return true;
2916 }
2917 }
2918
2919 // If nothing else we can XOR the LHS and RHS to produce zero if they are
2920 // equal and a non-zero value if they aren't.
2921 Val = SDValue(
2922 CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0);
2923 return true;
2924}
2925
2927 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
2928 cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) {
2929 Val = N.getOperand(0);
2930 return true;
2931 }
2932
2933 auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
2934 if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1)))
2935 return N;
2936
2937 SDValue N0 = N.getOperand(0);
2938 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
2939 N.getConstantOperandVal(1) == ShiftAmt &&
2940 N0.getConstantOperandVal(1) == ShiftAmt)
2941 return N0.getOperand(0);
2942
2943 return N;
2944 };
2945
2946 MVT VT = N.getSimpleValueType();
2947 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) {
2948 Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
2949 return true;
2950 }
2951
2952 return false;
2953}
2954
2956 if (N.getOpcode() == ISD::AND) {
2957 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
2958 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
2959 Val = N.getOperand(0);
2960 return true;
2961 }
2962 }
2963 MVT VT = N.getSimpleValueType();
2964 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits);
2965 if (CurDAG->MaskedValueIsZero(N, Mask)) {
2966 Val = N;
2967 return true;
2968 }
2969
2970 return false;
2971}
2972
2973/// Look for various patterns that can be done with a SHL that can be folded
2974/// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
2975/// SHXADD we are trying to match.
2977 SDValue &Val) {
2978 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
2979 SDValue N0 = N.getOperand(0);
2980
2981 bool LeftShift = N0.getOpcode() == ISD::SHL;
2982 if ((LeftShift || N0.getOpcode() == ISD::SRL) &&
2983 isa<ConstantSDNode>(N0.getOperand(1))) {
2984 uint64_t Mask = N.getConstantOperandVal(1);
2985 unsigned C2 = N0.getConstantOperandVal(1);
2986
2987 unsigned XLen = Subtarget->getXLen();
2988 if (LeftShift)
2989 Mask &= maskTrailingZeros<uint64_t>(C2);
2990 else
2991 Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
2992
2993 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
2994 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
2995 // followed by a SHXADD with c3 for the X amount.
2996 if (isShiftedMask_64(Mask)) {
2997 unsigned Leading = XLen - llvm::bit_width(Mask);
2998 unsigned Trailing = llvm::countr_zero(Mask);
2999 if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) {
3000 SDLoc DL(N);
3001 EVT VT = N.getValueType();
3003 RISCV::SRLI, DL, VT, N0.getOperand(0),
3004 CurDAG->getTargetConstant(Trailing - C2, DL, VT)),
3005 0);
3006 return true;
3007 }
3008 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
3009 // leading zeros and c3 trailing zeros. We can use an SRLI by C3
3010 // followed by a SHXADD using c3 for the X amount.
3011 if (!LeftShift && Leading == C2 && Trailing == ShAmt) {
3012 SDLoc DL(N);
3013 EVT VT = N.getValueType();
3014 Val = SDValue(
3016 RISCV::SRLI, DL, VT, N0.getOperand(0),
3017 CurDAG->getTargetConstant(Leading + Trailing, DL, VT)),
3018 0);
3019 return true;
3020 }
3021 }
3022 }
3023 }
3024
3025 bool LeftShift = N.getOpcode() == ISD::SHL;
3026 if ((LeftShift || N.getOpcode() == ISD::SRL) &&
3027 isa<ConstantSDNode>(N.getOperand(1))) {
3028 SDValue N0 = N.getOperand(0);
3029 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
3030 isa<ConstantSDNode>(N0.getOperand(1))) {
3031 uint64_t Mask = N0.getConstantOperandVal(1);
3032 if (isShiftedMask_64(Mask)) {
3033 unsigned C1 = N.getConstantOperandVal(1);
3034 unsigned XLen = Subtarget->getXLen();
3035 unsigned Leading = XLen - llvm::bit_width(Mask);
3036 unsigned Trailing = llvm::countr_zero(Mask);
3037 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
3038 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
3039 if (LeftShift && Leading == 32 && Trailing > 0 &&
3040 (Trailing + C1) == ShAmt) {
3041 SDLoc DL(N);
3042 EVT VT = N.getValueType();
3044 RISCV::SRLIW, DL, VT, N0.getOperand(0),
3045 CurDAG->getTargetConstant(Trailing, DL, VT)),
3046 0);
3047 return true;
3048 }
3049 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
3050 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
3051 if (!LeftShift && Leading == 32 && Trailing > C1 &&
3052 (Trailing - C1) == ShAmt) {
3053 SDLoc DL(N);
3054 EVT VT = N.getValueType();
3056 RISCV::SRLIW, DL, VT, N0.getOperand(0),
3057 CurDAG->getTargetConstant(Trailing, DL, VT)),
3058 0);
3059 return true;
3060 }
3061 }
3062 }
3063 }
3064
3065 return false;
3066}
3067
3068/// Look for various patterns that can be done with a SHL that can be folded
3069/// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
3070/// SHXADD_UW we are trying to match.
3072 SDValue &Val) {
3073 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) &&
3074 N.hasOneUse()) {
3075 SDValue N0 = N.getOperand(0);
3076 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
3077 N0.hasOneUse()) {
3078 uint64_t Mask = N.getConstantOperandVal(1);
3079 unsigned C2 = N0.getConstantOperandVal(1);
3080
3081 Mask &= maskTrailingZeros<uint64_t>(C2);
3082
3083 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
3084 // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
3085 // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
3086 if (isShiftedMask_64(Mask)) {
3087 unsigned Leading = llvm::countl_zero(Mask);
3088 unsigned Trailing = llvm::countr_zero(Mask);
3089 if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
3090 SDLoc DL(N);
3091 EVT VT = N.getValueType();
3093 RISCV::SLLI, DL, VT, N0.getOperand(0),
3094 CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)),
3095 0);
3096 return true;
3097 }
3098 }
3099 }
3100 }
3101
3102 return false;
3103}
3104
3105static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
3106 unsigned Bits,
3107 const TargetInstrInfo *TII) {
3108 unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode());
3109
3110 if (!MCOpcode)
3111 return false;
3112
3113 const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
3114 const uint64_t TSFlags = MCID.TSFlags;
3115 if (!RISCVII::hasSEWOp(TSFlags))
3116 return false;
3117 assert(RISCVII::hasVLOp(TSFlags));
3118
3119 bool HasGlueOp = User->getGluedNode() != nullptr;
3120 unsigned ChainOpIdx = User->getNumOperands() - HasGlueOp - 1;
3121 bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other;
3122 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
3123 unsigned VLIdx =
3124 User->getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
3125 const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1);
3126
3127 if (UserOpNo == VLIdx)
3128 return false;
3129
3130 auto NumDemandedBits =
3131 RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW);
3132 return NumDemandedBits && Bits >= *NumDemandedBits;
3133}
3134
3135// Return true if all users of this SDNode* only consume the lower \p Bits.
3136// This can be used to form W instructions for add/sub/mul/shl even when the
3137// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
3138// SimplifyDemandedBits has made it so some users see a sext_inreg and some
3139// don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
3140// the add/sub/mul/shl to become non-W instructions. By checking the users we
3141// may be able to use a W instruction and CSE with the other instruction if
3142// this has happened. We could try to detect that the CSE opportunity exists
3143// before doing this, but that would be more complicated.
3145 const unsigned Depth) const {
3146 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
3147 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
3148 Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
3149 Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
3150 Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
3151 isa<ConstantSDNode>(Node) || Depth != 0) &&
3152 "Unexpected opcode");
3153
3155 return false;
3156
3157 // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
3158 // the VT. Ensure the type is scalar to avoid wasting time on vectors.
3159 if (Depth == 0 && !Node->getValueType(0).isScalarInteger())
3160 return false;
3161
3162 for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) {
3163 SDNode *User = *UI;
3164 // Users of this node should have already been instruction selected
3165 if (!User->isMachineOpcode())
3166 return false;
3167
3168 // TODO: Add more opcodes?
3169 switch (User->getMachineOpcode()) {
3170 default:
3171 if (vectorPseudoHasAllNBitUsers(User, UI.getOperandNo(), Bits, TII))
3172 break;
3173 return false;
3174 case RISCV::ADDW:
3175 case RISCV::ADDIW:
3176 case RISCV::SUBW:
3177 case RISCV::MULW:
3178 case RISCV::SLLW:
3179 case RISCV::SLLIW:
3180 case RISCV::SRAW:
3181 case RISCV::SRAIW:
3182 case RISCV::SRLW:
3183 case RISCV::SRLIW:
3184 case RISCV::DIVW:
3185 case RISCV::DIVUW:
3186 case RISCV::REMW:
3187 case RISCV::REMUW:
3188 case RISCV::ROLW:
3189 case RISCV::RORW:
3190 case RISCV::RORIW:
3191 case RISCV::CLZW:
3192 case RISCV::CTZW:
3193 case RISCV::CPOPW:
3194 case RISCV::SLLI_UW:
3195 case RISCV::FMV_W_X:
3196 case RISCV::FCVT_H_W:
3197 case RISCV::FCVT_H_W_INX:
3198 case RISCV::FCVT_H_WU:
3199 case RISCV::FCVT_H_WU_INX:
3200 case RISCV::FCVT_S_W:
3201 case RISCV::FCVT_S_W_INX:
3202 case RISCV::FCVT_S_WU:
3203 case RISCV::FCVT_S_WU_INX:
3204 case RISCV::FCVT_D_W:
3205 case RISCV::FCVT_D_W_INX:
3206 case RISCV::FCVT_D_WU:
3207 case RISCV::FCVT_D_WU_INX:
3208 case RISCV::TH_REVW:
3209 case RISCV::TH_SRRIW:
3210 if (Bits >= 32)
3211 break;
3212 return false;
3213 case RISCV::SLL:
3214 case RISCV::SRA:
3215 case RISCV::SRL:
3216 case RISCV::ROL:
3217 case RISCV::ROR:
3218 case RISCV::BSET:
3219 case RISCV::BCLR:
3220 case RISCV::BINV:
3221 // Shift amount operands only use log2(Xlen) bits.
3222 if (UI.getOperandNo() == 1 && Bits >= Log2_32(Subtarget->getXLen()))
3223 break;
3224 return false;
3225 case RISCV::SLLI:
3226 // SLLI only uses the lower (XLen - ShAmt) bits.
3227 if (Bits >= Subtarget->getXLen() - User->getConstantOperandVal(1))
3228 break;
3229 return false;
3230 case RISCV::ANDI:
3231 if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1)))
3232 break;
3233 goto RecCheck;
3234 case RISCV::ORI: {
3235 uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
3236 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
3237 break;
3238 [[fallthrough]];
3239 }
3240 case RISCV::AND:
3241 case RISCV::OR:
3242 case RISCV::XOR:
3243 case RISCV::XORI:
3244 case RISCV::ANDN:
3245 case RISCV::ORN:
3246 case RISCV::XNOR:
3247 case RISCV::SH1ADD:
3248 case RISCV::SH2ADD:
3249 case RISCV::SH3ADD:
3250 RecCheck:
3251 if (hasAllNBitUsers(User, Bits, Depth + 1))
3252 break;
3253 return false;
3254 case RISCV::SRLI: {
3255 unsigned ShAmt = User->getConstantOperandVal(1);
3256 // If we are shifting right by less than Bits, and users don't demand any
3257 // bits that were shifted into [Bits-1:0], then we can consider this as an
3258 // N-Bit user.
3259 if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1))
3260 break;
3261 return false;
3262 }
3263 case RISCV::SEXT_B:
3264 case RISCV::PACKH:
3265 if (Bits >= 8)
3266 break;
3267 return false;
3268 case RISCV::SEXT_H:
3269 case RISCV::FMV_H_X:
3270 case RISCV::ZEXT_H_RV32:
3271 case RISCV::ZEXT_H_RV64:
3272 case RISCV::PACKW:
3273 if (Bits >= 16)
3274 break;
3275 return false;
3276 case RISCV::PACK:
3277 if (Bits >= (Subtarget->getXLen() / 2))
3278 break;
3279 return false;
3280 case RISCV::ADD_UW:
3281 case RISCV::SH1ADD_UW:
3282 case RISCV::SH2ADD_UW:
3283 case RISCV::SH3ADD_UW:
3284 // The first operand to add.uw/shXadd.uw is implicitly zero extended from
3285 // 32 bits.
3286 if (UI.getOperandNo() == 0 && Bits >= 32)
3287 break;
3288 return false;
3289 case RISCV::SB:
3290 if (UI.getOperandNo() == 0 && Bits >= 8)
3291 break;
3292 return false;
3293 case RISCV::SH:
3294 if (UI.getOperandNo() == 0 && Bits >= 16)
3295 break;
3296 return false;
3297 case RISCV::SW:
3298 if (UI.getOperandNo() == 0 && Bits >= 32)
3299 break;
3300 return false;
3301 }
3302 }
3303
3304 return true;
3305}
3306
3307// Select a constant that can be represented as (sign_extend(imm5) << imm2).
3309 SDValue &Shl2) {
3310 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3311 int64_t Offset = C->getSExtValue();
3312 int64_t Shift;
3313 for (Shift = 0; Shift < 4; Shift++)
3314 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
3315 break;
3316
3317 // Constant cannot be encoded.
3318 if (Shift == 4)
3319 return false;
3320
3321 EVT Ty = N->getValueType(0);
3322 Simm5 = CurDAG->getTargetConstant(Offset >> Shift, SDLoc(N), Ty);
3323 Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), Ty);
3324 return true;
3325 }
3326
3327 return false;
3328}
3329
3330// Select VL as a 5 bit immediate or a value that will become a register. This
3331// allows us to choose betwen VSETIVLI or VSETVLI later.
3333 auto *C = dyn_cast<ConstantSDNode>(N);
3334 if (C && isUInt<5>(C->getZExtValue())) {
3335 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
3336 N->getValueType(0));
3337 } else if (C && C->isAllOnes()) {
3338 // Treat all ones as VLMax.
3340 N->getValueType(0));
3341 } else if (isa<RegisterSDNode>(N) &&
3342 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {
3343 // All our VL operands use an operand that allows GPRNoX0 or an immediate
3344 // as the register class. Convert X0 to a special immediate to pass the
3345 // MachineVerifier. This is recognized specially by the vsetvli insertion
3346 // pass.
3348 N->getValueType(0));
3349 } else {
3350 VL = N;
3351 }
3352
3353 return true;
3354}
3355
3357 if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {
3358 if (!N.getOperand(0).isUndef())
3359 return SDValue();
3360 N = N.getOperand(1);
3361 }
3362 SDValue Splat = N;
3363 if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&
3364 Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||
3365 !Splat.getOperand(0).isUndef())
3366 return SDValue();
3367 assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");
3368 return Splat;
3369}
3370
3373 if (!Splat)
3374 return false;
3375
3376 SplatVal = Splat.getOperand(1);
3377 return true;
3378}
3379
3381 SelectionDAG &DAG,
3382 const RISCVSubtarget &Subtarget,
3383 std::function<bool(int64_t)> ValidateImm) {
3385 if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1)))
3386 return false;
3387
3388 const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();
3389 assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&
3390 "Unexpected splat operand type");
3391
3392 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
3393 // type is wider than the resulting vector element type: an implicit
3394 // truncation first takes place. Therefore, perform a manual
3395 // truncation/sign-extension in order to ignore any truncated bits and catch
3396 // any zero-extended immediate.
3397 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
3398 // sign-extending to (XLenVT -1).
3399 APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize);
3400
3401 int64_t SplatImm = SplatConst.getSExtValue();
3402
3403 if (!ValidateImm(SplatImm))
3404 return false;
3405
3406 SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT());
3407 return true;
3408}
3409
3411 return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget,
3412 [](int64_t Imm) { return isInt<5>(Imm); });
3413}
3414
3416 return selectVSplatImmHelper(
3417 N, SplatVal, *CurDAG, *Subtarget,
3418 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; });
3419}
3420
3422 SDValue &SplatVal) {
3423 return selectVSplatImmHelper(
3424 N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) {
3425 return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16);
3426 });
3427}
3428
3430 SDValue &SplatVal) {
3431 return selectVSplatImmHelper(
3432 N, SplatVal, *CurDAG, *Subtarget,
3433 [Bits](int64_t Imm) { return isUIntN(Bits, Imm); });
3434}
3435
3437 auto IsExtOrTrunc = [](SDValue N) {
3438 switch (N->getOpcode()) {
3439 case ISD::SIGN_EXTEND:
3440 case ISD::ZERO_EXTEND:
3441 // There's no passthru on these _VL nodes so any VL/mask is ok, since any
3442 // inactive elements will be undef.
3444 case RISCVISD::VSEXT_VL:
3445 case RISCVISD::VZEXT_VL:
3446 return true;
3447 default:
3448 return false;
3449 }
3450 };
3451
3452 // We can have multiple nested nodes, so unravel them all if needed.
3453 while (IsExtOrTrunc(N)) {
3454 if (!N.hasOneUse() || N.getScalarValueSizeInBits() < 8)
3455 return false;
3456 N = N->getOperand(0);
3457 }
3458
3459 return selectVSplat(N, SplatVal);
3460}
3461
3463 ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N.getNode());
3464 if (!CFP)
3465 return false;
3466 const APFloat &APF = CFP->getValueAPF();
3467 // td can handle +0.0 already.
3468 if (APF.isPosZero())
3469 return false;
3470
3471 MVT VT = CFP->getSimpleValueType(0);
3472
3473 // Even if this FPImm requires an additional FNEG (i.e. the second element of
3474 // the returned pair is true) we still prefer FLI + FNEG over immediate
3475 // materialization as the latter might generate a longer instruction sequence.
3476 if (static_cast<const RISCVTargetLowering *>(TLI)
3477 ->getLegalZfaFPImm(APF, VT)
3478 .first >= 0)
3479 return false;
3480
3481 MVT XLenVT = Subtarget->getXLenVT();
3482 if (VT == MVT::f64 && !Subtarget->is64Bit()) {
3483 assert(APF.isNegZero() && "Unexpected constant.");
3484 return false;
3485 }
3486 SDLoc DL(N);
3487 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
3488 *Subtarget);
3489 return true;
3490}
3491
3493 SDValue &Imm) {
3494 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3495 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);
3496
3497 if (!isInt<5>(ImmVal))
3498 return false;
3499
3500 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT());
3501 return true;
3502 }
3503
3504 return false;
3505}
3506
3507// Try to remove sext.w if the input is a W instruction or can be made into
3508// a W instruction cheaply.
3509bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
3510 // Look for the sext.w pattern, addiw rd, rs1, 0.
3511 if (N->getMachineOpcode() != RISCV::ADDIW ||
3512 !isNullConstant(N->getOperand(1)))
3513 return false;
3514
3515 SDValue N0 = N->getOperand(0);
3516 if (!N0.isMachineOpcode())
3517 return false;
3518
3519 switch (N0.getMachineOpcode()) {
3520 default:
3521 break;
3522 case RISCV::ADD:
3523 case RISCV::ADDI:
3524 case RISCV::SUB:
3525 case RISCV::MUL:
3526 case RISCV::SLLI: {
3527 // Convert sext.w+add/sub/mul to their W instructions. This will create
3528 // a new independent instruction. This improves latency.
3529 unsigned Opc;
3530 switch (N0.getMachineOpcode()) {
3531 default:
3532 llvm_unreachable("Unexpected opcode!");
3533 case RISCV::ADD: Opc = RISCV::ADDW; break;
3534 case RISCV::ADDI: Opc = RISCV::ADDIW; break;
3535 case RISCV::SUB: Opc = RISCV::SUBW; break;
3536 case RISCV::MUL: Opc = RISCV::MULW; break;
3537 case RISCV::SLLI: Opc = RISCV::SLLIW; break;
3538 }
3539
3540 SDValue N00 = N0.getOperand(0);
3541 SDValue N01 = N0.getOperand(1);
3542
3543 // Shift amount needs to be uimm5.
3544 if (N0.getMachineOpcode() == RISCV::SLLI &&
3545 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
3546 break;
3547
3548 SDNode *Result =
3549 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
3550 N00, N01);
3551 ReplaceUses(N, Result);
3552 return true;
3553 }
3554 case RISCV::ADDW:
3555 case RISCV::ADDIW:
3556 case RISCV::SUBW:
3557 case RISCV::MULW:
3558 case RISCV::SLLIW:
3559 case RISCV::PACKW:
3560 case RISCV::TH_MULAW:
3561 case RISCV::TH_MULAH:
3562 case RISCV::TH_MULSW:
3563 case RISCV::TH_MULSH:
3564 if (N0.getValueType() == MVT::i32)
3565 break;
3566
3567 // Result is already sign extended just remove the sext.w.
3568 // NOTE: We only handle the nodes that are selected with hasAllWUsers.
3569 ReplaceUses(N, N0.getNode());
3570 return true;
3571 }
3572
3573 return false;
3574}
3575
3576// After ISel, a vector pseudo's mask will be copied to V0 via a CopyToReg
3577// that's glued to the pseudo. This tries to look up the value that was copied
3578// to V0.
3579static SDValue getMaskSetter(SDValue MaskOp, SDValue GlueOp) {
3580 // Check that we're using V0 as a mask register.
3581 if (!isa<RegisterSDNode>(MaskOp) ||
3582 cast<RegisterSDNode>(MaskOp)->getReg() != RISCV::V0)
3583 return SDValue();
3584
3585 // The glued user defines V0.
3586 const auto *Glued = GlueOp.getNode();
3587
3588 if (!Glued || Glued->getOpcode() != ISD::CopyToReg)
3589 return SDValue();
3590
3591 // Check that we're defining V0 as a mask register.
3592 if (!isa<RegisterSDNode>(Glued->getOperand(1)) ||
3593 cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0)
3594 return SDValue();
3595
3596 SDValue MaskSetter = Glued->getOperand(2);
3597
3598 // Sometimes the VMSET is wrapped in a COPY_TO_REGCLASS, e.g. if the mask came
3599 // from an extract_subvector or insert_subvector.
3600 if (MaskSetter->isMachineOpcode() &&
3601 MaskSetter->getMachineOpcode() == RISCV::COPY_TO_REGCLASS)
3602 MaskSetter = MaskSetter->getOperand(0);
3603
3604 return MaskSetter;
3605}
3606
3607static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp) {
3608 // Check the instruction defining V0; it needs to be a VMSET pseudo.
3609 SDValue MaskSetter = getMaskSetter(MaskOp, GlueOp);
3610 if (!MaskSetter)
3611 return false;
3612
3613 const auto IsVMSet = [](unsigned Opc) {
3614 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
3615 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
3616 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
3617 Opc == RISCV::PseudoVMSET_M_B8;
3618 };
3619
3620 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
3621 // undefined behaviour if it's the wrong bitwidth, so we could choose to
3622 // assume that it's all-ones? Same applies to its VL.
3623 return MaskSetter->isMachineOpcode() &&
3624 IsVMSet(MaskSetter.getMachineOpcode());
3625}
3626
3627// Return true if we can make sure mask of N is all-ones mask.
3628static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) {
3629 return usesAllOnesMask(N->getOperand(MaskOpIdx),
3630 N->getOperand(N->getNumOperands() - 1));
3631}
3632
3633static bool isImplicitDef(SDValue V) {
3634 if (!V.isMachineOpcode())
3635 return false;
3636 if (V.getMachineOpcode() == TargetOpcode::REG_SEQUENCE) {
3637 for (unsigned I = 1; I < V.getNumOperands(); I += 2)
3638 if (!isImplicitDef(V.getOperand(I)))
3639 return false;
3640 return true;
3641 }
3642 return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
3643}
3644
3645// Optimize masked RVV pseudo instructions with a known all-ones mask to their
3646// corresponding "unmasked" pseudo versions. The mask we're interested in will
3647// take the form of a V0 physical register operand, with a glued
3648// register-setting instruction.
3649bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
3651 RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
3652 if (!I)
3653 return false;
3654
3655 unsigned MaskOpIdx = I->MaskOpIdx;
3656 if (!usesAllOnesMask(N, MaskOpIdx))
3657 return false;
3658
3659 // There are two classes of pseudos in the table - compares and
3660 // everything else. See the comment on RISCVMaskedPseudo for details.
3661 const unsigned Opc = I->UnmaskedPseudo;
3662 const MCInstrDesc &MCID = TII->get(Opc);
3663 const bool UseTUPseudo = RISCVII::hasVecPolicyOp(MCID.TSFlags);
3664#ifndef NDEBUG
3665 const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode());
3668 "Masked and unmasked pseudos are inconsistent");
3669 const bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(MCID);
3670 assert(UseTUPseudo == HasTiedDest && "Unexpected pseudo structure");
3671#endif
3672
3674 // Skip the passthru operand at index 0 if !UseTUPseudo.
3675 for (unsigned I = !UseTUPseudo, E = N->getNumOperands(); I != E; I++) {
3676 // Skip the mask, and the Glue.
3677 SDValue Op = N->getOperand(I);
3678 if (I == MaskOpIdx || Op.getValueType() == MVT::Glue)
3679 continue;
3680 Ops.push_back(Op);
3681 }
3682
3683 // Transitively apply any node glued to our new node.
3684 const auto *Glued = N->getGluedNode();
3685 if (auto *TGlued = Glued->getGluedNode())
3686 Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1));
3687
3689 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3690
3691 if (!N->memoperands_empty())
3692 CurDAG->setNodeMemRefs(Result, N->memoperands());
3693
3694 Result->setFlags(N->getFlags());
3695 ReplaceUses(N, Result);
3696
3697 return true;
3698}
3699
3700static bool IsVMerge(SDNode *N) {
3701 return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMERGE_VVM;
3702}
3703
3704static bool IsVMv(SDNode *N) {
3705 return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMV_V_V;
3706}
3707
3708static unsigned GetVMSetForLMul(RISCVII::VLMUL LMUL) {
3709 switch (LMUL) {
3710 case RISCVII::LMUL_F8:
3711 return RISCV::PseudoVMSET_M_B1;
3712 case RISCVII::LMUL_F4:
3713 return RISCV::PseudoVMSET_M_B2;
3714 case RISCVII::LMUL_F2:
3715 return RISCV::PseudoVMSET_M_B4;
3716 case RISCVII::LMUL_1:
3717 return RISCV::PseudoVMSET_M_B8;
3718 case RISCVII::LMUL_2:
3719 return RISCV::PseudoVMSET_M_B16;
3720 case RISCVII::LMUL_4:
3721 return RISCV::PseudoVMSET_M_B32;
3722 case RISCVII::LMUL_8:
3723 return RISCV::PseudoVMSET_M_B64;
3725 llvm_unreachable("Unexpected LMUL");
3726 }
3727 llvm_unreachable("Unknown VLMUL enum");
3728}
3729
3730// Try to fold away VMERGE_VVM instructions into their true operands:
3731//
3732// %true = PseudoVADD_VV ...
3733// %x = PseudoVMERGE_VVM %false, %false, %true, %mask
3734// ->
3735// %x = PseudoVADD_VV_MASK %false, ..., %mask
3736//
3737// We can only fold if vmerge's passthru operand, vmerge's false operand and
3738// %true's passthru operand (if it has one) are the same. This is because we
3739// have to consolidate them into one passthru operand in the result.
3740//
3741// If %true is masked, then we can use its mask instead of vmerge's if vmerge's
3742// mask is all ones.
3743//
3744// We can also fold a VMV_V_V into its true operand, since it is equivalent to a
3745// VMERGE_VVM with an all ones mask.
3746//
3747// The resulting VL is the minimum of the two VLs.
3748//
3749// The resulting policy is the effective policy the vmerge would have had,
3750// i.e. whether or not it's passthru operand was implicit-def.
3751bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
3752 SDValue Passthru, False, True, VL, Mask, Glue;
3753 // A vmv.v.v is equivalent to a vmerge with an all-ones mask.
3754 if (IsVMv(N)) {
3755 Passthru = N->getOperand(0);
3756 False = N->getOperand(0);
3757 True = N->getOperand(1);
3758 VL = N->getOperand(2);
3759 // A vmv.v.v won't have a Mask or Glue, instead we'll construct an all-ones
3760 // mask later below.
3761 } else {
3762 assert(IsVMerge(N));
3763 Passthru = N->getOperand(0);
3764 False = N->getOperand(1);
3765 True = N->getOperand(2);
3766 Mask = N->getOperand(3);
3767 VL = N->getOperand(4);
3768 // We always have a glue node for the mask at v0.
3769 Glue = N->getOperand(N->getNumOperands() - 1);
3770 }
3771 assert(!Mask || cast<RegisterSDNode>(Mask)->getReg() == RISCV::V0);
3772 assert(!Glue || Glue.getValueType() == MVT::Glue);
3773
3774 // If the EEW of True is different from vmerge's SEW, then we can't fold.
3775 if (True.getSimpleValueType() != N->getSimpleValueType(0))
3776 return false;
3777
3778 // We require that either passthru and false are the same, or that passthru
3779 // is undefined.
3780 if (Passthru != False && !isImplicitDef(Passthru))
3781 return false;
3782
3783 assert(True.getResNo() == 0 &&
3784 "Expect True is the first output of an instruction.");
3785
3786 // Need N is the exactly one using True.
3787 if (!True.hasOneUse())
3788 return false;
3789
3790 if (!True.isMachineOpcode())
3791 return false;
3792
3793 unsigned TrueOpc = True.getMachineOpcode();
3794 const MCInstrDesc &TrueMCID = TII->get(TrueOpc);
3795 uint64_t TrueTSFlags = TrueMCID.TSFlags;
3796 bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(TrueMCID);
3797
3798 bool IsMasked = false;
3800 RISCV::lookupMaskedIntrinsicByUnmasked(TrueOpc);
3801 if (!Info && HasTiedDest) {
3802 Info = RISCV::getMaskedPseudoInfo(TrueOpc);
3803 IsMasked = true;
3804 }
3805 assert(!(IsMasked && !HasTiedDest) && "Expected tied dest");
3806
3807 if (!Info)
3808 return false;
3809
3810 // If True has a passthru operand then it needs to be the same as vmerge's
3811 // False, since False will be used for the result's passthru operand.
3812 if (HasTiedDest && !isImplicitDef(True->getOperand(0))) {
3813 SDValue PassthruOpTrue = True->getOperand(0);
3814 if (False != PassthruOpTrue)
3815 return false;
3816 }
3817
3818 // If True is masked then the vmerge must have either the same mask or an all
3819 // 1s mask, since we're going to keep the mask from True.
3820 if (IsMasked && Mask) {
3821 // FIXME: Support mask agnostic True instruction which would have an
3822 // undef passthru operand.
3823 SDValue TrueMask =
3824 getMaskSetter(True->getOperand(Info->MaskOpIdx),
3825 True->getOperand(True->getNumOperands() - 1));
3826 assert(TrueMask);
3827 if (!usesAllOnesMask(Mask, Glue) && getMaskSetter(Mask, Glue) != TrueMask)
3828 return false;
3829 }
3830
3831 // Skip if True has side effect.
3832 if (TII->get(TrueOpc).hasUnmodeledSideEffects())
3833 return false;
3834
3835 // The last operand of a masked instruction may be glued.
3836 bool HasGlueOp = True->getGluedNode() != nullptr;
3837
3838 // The chain operand may exist either before the glued operands or in the last
3839 // position.
3840 unsigned TrueChainOpIdx = True.getNumOperands() - HasGlueOp - 1;
3841 bool HasChainOp =
3842 True.getOperand(TrueChainOpIdx).getValueType() == MVT::Other;
3843
3844 if (HasChainOp) {
3845 // Avoid creating cycles in the DAG. We must ensure that none of the other
3846 // operands depend on True through it's Chain.
3847 SmallVector<const SDNode *, 4> LoopWorklist;
3849 LoopWorklist.push_back(False.getNode());
3850 if (Mask)
3851 LoopWorklist.push_back(Mask.getNode());
3852 LoopWorklist.push_back(VL.getNode());
3853 if (Glue)
3854 LoopWorklist.push_back(Glue.getNode());
3855 if (SDNode::hasPredecessorHelper(True.getNode(), Visited, LoopWorklist))
3856 return false;
3857 }
3858
3859 // The vector policy operand may be present for masked intrinsics
3860 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TrueTSFlags);
3861 unsigned TrueVLIndex =
3862 True.getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
3863 SDValue TrueVL = True.getOperand(TrueVLIndex);
3864 SDValue SEW = True.getOperand(TrueVLIndex + 1);
3865
3866 auto GetMinVL = [](SDValue LHS, SDValue RHS) {
3867 if (LHS == RHS)
3868 return LHS;
3869 if (isAllOnesConstant(LHS))
3870 return RHS;
3871 if (isAllOnesConstant(RHS))
3872 return LHS;
3873 auto *CLHS = dyn_cast<ConstantSDNode>(LHS);
3874 auto *CRHS = dyn_cast<ConstantSDNode>(RHS);
3875 if (!CLHS || !CRHS)
3876 return SDValue();
3877 return CLHS->getZExtValue() <= CRHS->getZExtValue() ? LHS : RHS;
3878 };
3879
3880 // Because N and True must have the same passthru operand (or True's operand
3881 // is implicit_def), the "effective" body is the minimum of their VLs.
3882 SDValue OrigVL = VL;
3883 VL = GetMinVL(TrueVL, VL);
3884 if (!VL)
3885 return false;
3886
3887 // Some operations produce different elementwise results depending on the
3888 // active elements, like viota.m or vredsum. This transformation is illegal
3889 // for these if we change the active elements (i.e. mask or VL).
3890 const MCInstrDesc &TrueBaseMCID = TII->get(RISCV::getRVVMCOpcode(TrueOpc));
3891 if (RISCVII::activeElementsAffectResult(TrueBaseMCID.TSFlags)) {
3892 if (Mask && !usesAllOnesMask(Mask, Glue))
3893 return false;
3894 if (TrueVL != VL)
3895 return false;
3896 }
3897
3898 // If we end up changing the VL or mask of True, then we need to make sure it
3899 // doesn't raise any observable fp exceptions, since changing the active
3900 // elements will affect how fflags is set.
3901 if (TrueVL != VL || !IsMasked)
3902 if (mayRaiseFPException(True.getNode()) &&
3903 !True->getFlags().hasNoFPExcept())
3904 return false;
3905
3906 SDLoc DL(N);
3907
3908 // From the preconditions we checked above, we know the mask and thus glue
3909 // for the result node will be taken from True.
3910 if (IsMasked) {
3911 Mask = True->getOperand(Info->MaskOpIdx);
3912 Glue = True->getOperand(True->getNumOperands() - 1);
3913 assert(Glue.getValueType() == MVT::Glue);
3914 }
3915 // If we end up using the vmerge mask the vmerge is actually a vmv.v.v, create
3916 // an all-ones mask to use.
3917 else if (IsVMv(N)) {
3918 unsigned TSFlags = TII->get(N->getMachineOpcode()).TSFlags;
3919 unsigned VMSetOpc = GetVMSetForLMul(RISCVII::getLMul(TSFlags));
3920 ElementCount EC = N->getValueType(0).getVectorElementCount();
3921 MVT MaskVT = MVT::getVectorVT(MVT::i1, EC);
3922
3923 SDValue AllOnesMask =
3924 SDValue(CurDAG->getMachineNode(VMSetOpc, DL, MaskVT, VL, SEW), 0);
3926 RISCV::V0, AllOnesMask, SDValue());
3927 Mask = CurDAG->getRegister(RISCV::V0, MaskVT);
3928 Glue = MaskCopy.getValue(1);
3929 }
3930
3931 unsigned MaskedOpc = Info->MaskedPseudo;
3932#ifndef NDEBUG
3933 const MCInstrDesc &MaskedMCID = TII->get(MaskedOpc);
3935 "Expected instructions with mask have policy operand.");
3936 assert(MaskedMCID.getOperandConstraint(MaskedMCID.getNumDefs(),
3937 MCOI::TIED_TO) == 0 &&
3938 "Expected instructions with mask have a tied dest.");
3939#endif
3940
3941 // Use a tumu policy, relaxing it to tail agnostic provided that the passthru
3942 // operand is undefined.
3943 //
3944 // However, if the VL became smaller than what the vmerge had originally, then
3945 // elements past VL that were previously in the vmerge's body will have moved
3946 // to the tail. In that case we always need to use tail undisturbed to
3947 // preserve them.
3948 bool MergeVLShrunk = VL != OrigVL;
3949 uint64_t Policy = (isImplicitDef(Passthru) && !MergeVLShrunk)
3951 : /*TUMU*/ 0;
3952 SDValue PolicyOp =
3953 CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT());
3954
3955
3957 Ops.push_back(False);
3958
3959 const bool HasRoundingMode = RISCVII::hasRoundModeOp(TrueTSFlags);
3960 const unsigned NormalOpsEnd = TrueVLIndex - IsMasked - HasRoundingMode;
3961 assert(!IsMasked || NormalOpsEnd == Info->MaskOpIdx);
3962 Ops.append(True->op_begin() + HasTiedDest, True->op_begin() + NormalOpsEnd);
3963
3964 Ops.push_back(Mask);
3965
3966 // For unmasked "VOp" with rounding mode operand, that is interfaces like
3967 // (..., rm, vl) or (..., rm, vl, policy).
3968 // Its masked version is (..., vm, rm, vl, policy).
3969 // Check the rounding mode pseudo nodes under RISCVInstrInfoVPseudos.td
3970 if (HasRoundingMode)
3971 Ops.push_back(True->getOperand(TrueVLIndex - 1));
3972
3973 Ops.append({VL, SEW, PolicyOp});
3974
3975 // Result node should have chain operand of True.
3976 if (HasChainOp)
3977 Ops.push_back(True.getOperand(TrueChainOpIdx));
3978
3979 // Add the glue for the CopyToReg of mask->v0.
3980 Ops.push_back(Glue);
3981
3983 CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops);
3984 Result->setFlags(True->getFlags());
3985
3986 if (!cast<MachineSDNode>(True)->memoperands_empty())
3987 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(True)->memoperands());
3988
3989 // Replace vmerge.vvm node by Result.
3990 ReplaceUses(SDValue(N, 0), SDValue(Result, 0));
3991
3992 // Replace another value of True. E.g. chain and VL.
3993 for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx)
3994 ReplaceUses(True.getValue(Idx), SDValue(Result, Idx));
3995
3996 return true;
3997}
3998
3999bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
4000 bool MadeChange = false;
4002
4003 while (Position != CurDAG->allnodes_begin()) {
4004 SDNode *N = &*--Position;
4005 if (N->use_empty() || !N->isMachineOpcode())
4006 continue;
4007
4008 if (IsVMerge(N) || IsVMv(N))
4009 MadeChange |= performCombineVMergeAndVOps(N);
4010 }
4011 return MadeChange;
4012}
4013
4014/// If our passthru is an implicit_def, use noreg instead. This side
4015/// steps issues with MachineCSE not being able to CSE expressions with
4016/// IMPLICIT_DEF operands while preserving the semantic intent. See
4017/// pr64282 for context. Note that this transform is the last one
4018/// performed at ISEL DAG to DAG.
4019bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
4020 bool MadeChange = false;
4022
4023 while (Position != CurDAG->allnodes_begin()) {
4024 SDNode *N = &*--Position;
4025 if (N->use_empty() || !N->isMachineOpcode())
4026 continue;
4027
4028 const unsigned Opc = N->getMachineOpcode();
4029 if (!RISCVVPseudosTable::getPseudoInfo(Opc) ||
4031 !isImplicitDef(N->getOperand(0)))
4032 continue;
4033
4035 Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0)));
4036 for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
4037 SDValue Op = N->getOperand(I);
4038 Ops.push_back(Op);
4039 }
4040
4042 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4043 Result->setFlags(N->getFlags());
4044 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands());
4045 ReplaceUses(N, Result);
4046 MadeChange = true;
4047 }
4048 return MadeChange;
4049}
4050
4051
4052// This pass converts a legalized DAG into a RISCV-specific DAG, ready
4053// for instruction scheduling.
4055 CodeGenOptLevel OptLevel) {
4056 return new RISCVDAGToDAGISelLegacy(TM, OptLevel);
4057}
4058
4060
4062 CodeGenOptLevel OptLevel)
4064 ID, std::make_unique<RISCVDAGToDAGISel>(TM, OptLevel)) {}
4065
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define DEBUG_TYPE
const HexagonInstrInfo * TII
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
R600 Clause Merge
static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, int64_t Imm, const RISCVSubtarget &Subtarget)
#define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b)
static bool isWorthFoldingAdd(SDValue Add)
static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, RISCVMatInt::InstSeq &Seq)
static bool isImplicitDef(SDValue V)
static unsigned GetVMSetForLMul(RISCVII::VLMUL LMUL)
#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix)
static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp)
static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo, unsigned Bits, const TargetInstrInfo *TII)
static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, const RISCVSubtarget *Subtarget, SDValue Addr, SDValue &Base, SDValue &Offset, bool IsPrefetch=false)
static bool IsVMv(SDNode *N)
static cl::opt< bool > UsePseudoMovImm("riscv-use-rematerializable-movimm", cl::Hidden, cl::desc("Use a rematerializable pseudoinstruction for 2 instruction " "constant materialization"), cl::init(false))
#define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b)
static SDValue findVSplat(SDValue N)
static SDValue getMaskSetter(SDValue MaskOp, SDValue GlueOp)
static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, std::function< bool(int64_t)> ValidateImm)
static bool IsVMerge(SDNode *N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
#define PASS_NAME
Value * RHS
Value * LHS
bool isZero() const
Definition: APFloat.h:1356
APInt bitcastToAPInt() const
Definition: APFloat.h:1266
bool isPosZero() const
Definition: APFloat.h:1371
bool isNegZero() const
Definition: APFloat.h:1372
Class for arbitrary precision integers.
Definition: APInt.h:78
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1448
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1237
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:266
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1522
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
const APFloat & getValueAPF() const
uint64_t getZExtValue() const
int64_t getSExtValue() const
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
This class is used to form a handle around another node that is persistent and is updated across invo...
static StringRef getMemConstraintName(ConstraintCode C)
Definition: InlineAsm.h:467
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Definition: MCInstrDesc.h:219
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by other flags.
Definition: MCInstrDesc.h:463
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
void setFlags(Flags f)
Bitwise OR the current flags with the given flags.
An SDNode that represents everything that will be needed to construct a MachineInstr.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
RISCVDAGToDAGISelLegacy(RISCVTargetMachine &TargetMachine, CodeGenOptLevel OptLevel)
bool selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal, SDValue &Val)
RISC-V doesn't have general instructions for integer setne/seteq, but we can check for equality with ...
bool selectSExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectZExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectSHXADD_UWOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD_UW.
bool hasAllNBitUsers(SDNode *Node, unsigned Bits, const unsigned Depth=0) const
void selectVSSEG(SDNode *Node, bool IsMasked, bool IsStrided)
bool SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, SDValue &Offset)
Similar to SelectAddrRegImm, except that the least significant 5 bits of Offset shoule be all zeros.
bool SelectAddrRegReg(SDValue Addr, SDValue &Base, SDValue &Offset)
bool SelectFrameAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset)
void selectVLSEGFF(SDNode *Node, bool IsMasked)
bool selectFPImm(SDValue N, SDValue &Imm)
bool selectSimm5Shl2(SDValue N, SDValue &Simm5, SDValue &Shl2)
void selectSF_VC_X_SE(SDNode *Node)
bool selectLow8BitsVSplat(SDValue N, SDValue &SplatVal)
bool hasAllHUsers(SDNode *Node) const
bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
bool selectVSplatSimm5(SDValue N, SDValue &SplatVal)
bool selectRVVSimm5(SDValue N, unsigned Width, SDValue &Imm)
bool SelectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset)
bool hasAllWUsers(SDNode *Node) const
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
void Select(SDNode *Node) override
Main hook for targets to transform nodes into machine nodes.
bool selectVSplat(SDValue N, SDValue &SplatVal)
void addVectorLoadStoreOperands(SDNode *Node, unsigned SEWImm, const SDLoc &DL, unsigned CurOp, bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl< SDValue > &Operands, bool IsLoad=false, MVT *IndexVT=nullptr)
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
bool hasAllBUsers(SDNode *Node) const
void selectVLXSEG(SDNode *Node, bool IsMasked, bool IsOrdered)
bool tryShrinkShlLogicImm(SDNode *Node)
void selectVSETVLI(SDNode *Node)
bool selectVLOp(SDValue N, SDValue &VL)
bool trySignedBitfieldExtract(SDNode *Node)
void selectVSXSEG(SDNode *Node, bool IsMasked, bool IsOrdered)
bool selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal)
bool selectVSplatSimm5Plus1NonZero(SDValue N, SDValue &SplatVal)
bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset, bool IsINX=false)
void selectVLSEG(SDNode *Node, bool IsMasked, bool IsStrided)
bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt)
bool selectSHXADDOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD.
bool tryIndexedLoad(SDNode *Node)
bool SelectAddrRegRegScale(SDValue Addr, unsigned MaxShiftAmount, SDValue &Base, SDValue &Index, SDValue &Scale)
bool selectVSplatUimm(SDValue N, unsigned Bits, SDValue &SplatVal)
Quantity expandVScale(Quantity X) const
If the ElementCount or TypeSize X is scalable and VScale (VLEN) is exactly known, returns X converted...
unsigned getXLen() const
bool hasVInstructions() const
std::optional< unsigned > getRealVLen() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
static unsigned getRegClassIDForVecVT(MVT VT)
static RISCVII::VLMUL getLMUL(MVT VT)
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
SDNode * getGluedNode() const
If this node has a glue operand, return the node to which the glue operand points.
op_iterator op_begin() const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
unsigned getNumOperands() const
const TargetLowering * TLI
MachineFunction * MF
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const
IsProfitableToFold - Returns true if it's profitable to fold the specific operand node N of U during ...
static bool IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, CodeGenOptLevel OptLevel, bool IgnoreChains=false)
IsLegalToFold - Returns true if the specific operand node N of U can be folded during instruction sel...
bool mayRaiseFPException(SDNode *Node) const
Return whether the node may raise an FP exception.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:738
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
Definition: SelectionDAG.h:569
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:453
allnodes_const_iterator allnodes_begin() const
Definition: SelectionDAG.h:549
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
allnodes_const_iterator allnodes_end() const
Definition: SelectionDAG.h:550
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:489
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:743
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getRegister(unsigned Reg, EVT VT)
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:789
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:692
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:484
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:815
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
Definition: SelectionDAG.h:578
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:572
ilist< SDNode >::iterator allnodes_iterator
Definition: SelectionDAG.h:552
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:503
bool empty() const
Definition: SmallVector.h:95
size_t size() const
Definition: SmallVector.h:92
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:587
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:697
void push_back(const T &Elt)
Definition: SmallVector.h:427
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1210
TargetInstrInfo - Interface to description of machine instruction set.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:348
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:779
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:573
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1284
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1074
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:813
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:933
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:804
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1264
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1280
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:641
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:734
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:587
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:810
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:828
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:708
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1527
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1578