LLVM 17.0.0git
RISCVISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISCV ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the RISCV target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "RISCVISelDAGToDAG.h"
16#include "RISCVISelLowering.h"
19#include "llvm/IR/IntrinsicsRISCV.h"
21#include "llvm/Support/Debug.h"
24#include <optional>
25
26using namespace llvm;
27
28#define DEBUG_TYPE "riscv-isel"
29#define PASS_NAME "RISCV DAG->DAG Pattern Instruction Selection"
30
31namespace llvm::RISCV {
32#define GET_RISCVVSSEGTable_IMPL
33#define GET_RISCVVLSEGTable_IMPL
34#define GET_RISCVVLXSEGTable_IMPL
35#define GET_RISCVVSXSEGTable_IMPL
36#define GET_RISCVVLETable_IMPL
37#define GET_RISCVVSETable_IMPL
38#define GET_RISCVVLXTable_IMPL
39#define GET_RISCVVSXTable_IMPL
40#define GET_RISCVMaskedPseudosTable_IMPL
41#include "RISCVGenSearchableTables.inc"
42} // namespace llvm::RISCV
43
44static unsigned getLastNonGlueOrChainOpIdx(const SDNode *Node) {
45 assert(Node->getNumOperands() > 0 && "Node with no operands");
46 unsigned LastOpIdx = Node->getNumOperands() - 1;
47 if (Node->getOperand(LastOpIdx).getValueType() == MVT::Glue)
48 --LastOpIdx;
49 if (Node->getOperand(LastOpIdx).getValueType() == MVT::Other)
50 --LastOpIdx;
51 return LastOpIdx;
52}
53
54static unsigned getVecPolicyOpIdx(const SDNode *Node, const MCInstrDesc &MCID) {
56 (void)MCID;
58}
59
62
63 bool MadeChange = false;
64 while (Position != CurDAG->allnodes_begin()) {
65 SDNode *N = &*--Position;
66 if (N->use_empty())
67 continue;
68
69 SDValue Result;
70 switch (N->getOpcode()) {
71 case ISD::SPLAT_VECTOR: {
72 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
73 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
74 MVT VT = N->getSimpleValueType(0);
75 unsigned Opc =
77 SDLoc DL(N);
78 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
79 Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT),
80 N->getOperand(0), VL);
81 break;
82 }
84 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
85 // load. Done after lowering and combining so that we have a chance to
86 // optimize this to VMV_V_X_VL when the upper bits aren't needed.
87 assert(N->getNumOperands() == 4 && "Unexpected number of operands");
88 MVT VT = N->getSimpleValueType(0);
89 SDValue Passthru = N->getOperand(0);
90 SDValue Lo = N->getOperand(1);
91 SDValue Hi = N->getOperand(2);
92 SDValue VL = N->getOperand(3);
94 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
95 "Unexpected VTs!");
99 SDLoc DL(N);
100
101 // We use the same frame index we use for moving two i32s into 64-bit FPR.
102 // This is an analogous operation.
103 int FI = FuncInfo->getMoveF64FrameIndex(MF);
106 SDValue StackSlot =
108
109 SDValue Chain = CurDAG->getEntryNode();
110 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
111
112 SDValue OffsetSlot =
114 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
115 Align(8));
116
118
119 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
120 SDValue IntID =
121 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
122 SDValue Ops[] = {Chain,
123 IntID,
124 Passthru,
125 StackSlot,
126 CurDAG->getRegister(RISCV::X0, MVT::i64),
127 VL};
128
130 MVT::i64, MPI, Align(8),
132 break;
133 }
134 }
135
136 if (Result) {
137 LLVM_DEBUG(dbgs() << "RISCV DAG preprocessing replacing:\nOld: ");
138 LLVM_DEBUG(N->dump(CurDAG));
139 LLVM_DEBUG(dbgs() << "\nNew: ");
140 LLVM_DEBUG(Result->dump(CurDAG));
141 LLVM_DEBUG(dbgs() << "\n");
142
144 MadeChange = true;
145 }
146 }
147
148 if (MadeChange)
150}
151
153 HandleSDNode Dummy(CurDAG->getRoot());
155
156 bool MadeChange = false;
157 while (Position != CurDAG->allnodes_begin()) {
158 SDNode *N = &*--Position;
159 // Skip dead nodes and any non-machine opcodes.
160 if (N->use_empty() || !N->isMachineOpcode())
161 continue;
162
163 MadeChange |= doPeepholeSExtW(N);
164 MadeChange |= doPeepholeMaskedRVV(N);
165 }
166
167 CurDAG->setRoot(Dummy.getValue());
168
169 MadeChange |= doPeepholeMergeVVMFold();
170
171 if (MadeChange)
173}
174
175static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
177 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);
178 for (RISCVMatInt::Inst &Inst : Seq) {
179 SDValue SDImm = CurDAG->getTargetConstant(Inst.getImm(), DL, VT);
180 SDNode *Result = nullptr;
181 switch (Inst.getOpndKind()) {
182 case RISCVMatInt::Imm:
183 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm);
184 break;
186 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg,
187 CurDAG->getRegister(RISCV::X0, VT));
188 break;
190 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg);
191 break;
193 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm);
194 break;
195 }
196
197 // Only the first instruction has X0 as its source.
198 SrcReg = SDValue(Result, 0);
199 }
200
201 return SrcReg;
202}
203
204static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
205 int64_t Imm, const RISCVSubtarget &Subtarget) {
207 RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits());
208
209 return selectImmSeq(CurDAG, DL, VT, Seq);
210}
211
213 unsigned NF, RISCVII::VLMUL LMUL) {
214 static const unsigned M1TupleRegClassIDs[] = {
215 RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID,
216 RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID,
217 RISCV::VRN8M1RegClassID};
218 static const unsigned M2TupleRegClassIDs[] = {RISCV::VRN2M2RegClassID,
219 RISCV::VRN3M2RegClassID,
220 RISCV::VRN4M2RegClassID};
221
222 assert(Regs.size() >= 2 && Regs.size() <= 8);
223
224 unsigned RegClassID;
225 unsigned SubReg0;
226 switch (LMUL) {
227 default:
228 llvm_unreachable("Invalid LMUL.");
233 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
234 "Unexpected subreg numbering");
235 SubReg0 = RISCV::sub_vrm1_0;
236 RegClassID = M1TupleRegClassIDs[NF - 2];
237 break;
239 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
240 "Unexpected subreg numbering");
241 SubReg0 = RISCV::sub_vrm2_0;
242 RegClassID = M2TupleRegClassIDs[NF - 2];
243 break;
245 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
246 "Unexpected subreg numbering");
247 SubReg0 = RISCV::sub_vrm4_0;
248 RegClassID = RISCV::VRN2M4RegClassID;
249 break;
250 }
251
252 SDLoc DL(Regs[0]);
254
255 Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32));
256
257 for (unsigned I = 0; I < Regs.size(); ++I) {
258 Ops.push_back(Regs[I]);
259 Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32));
260 }
261 SDNode *N =
262 CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
263 return SDValue(N, 0);
264}
265
267 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
268 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
269 bool IsLoad, MVT *IndexVT) {
270 SDValue Chain = Node->getOperand(0);
271 SDValue Glue;
272
273 Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.
274
275 if (IsStridedOrIndexed) {
276 Operands.push_back(Node->getOperand(CurOp++)); // Index.
277 if (IndexVT)
278 *IndexVT = Operands.back()->getSimpleValueType(0);
279 }
280
281 if (IsMasked) {
282 // Mask needs to be copied to V0.
283 SDValue Mask = Node->getOperand(CurOp++);
284 Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue());
285 Glue = Chain.getValue(1);
286 Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType()));
287 }
288 SDValue VL;
289 selectVLOp(Node->getOperand(CurOp++), VL);
290 Operands.push_back(VL);
291
292 MVT XLenVT = Subtarget->getXLenVT();
293 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
294 Operands.push_back(SEWOp);
295
296 // Masked load has the tail policy argument.
297 if (IsMasked && IsLoad) {
298 // Policy must be a constant.
299 uint64_t Policy = Node->getConstantOperandVal(CurOp++);
300 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
301 Operands.push_back(PolicyOp);
302 }
303
304 Operands.push_back(Chain); // Chain.
305 if (Glue)
306 Operands.push_back(Glue);
307}
308
309static bool isAllUndef(ArrayRef<SDValue> Values) {
310 return llvm::all_of(Values, [](SDValue V) { return V->isUndef(); });
311}
312
313void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked,
314 bool IsStrided) {
315 SDLoc DL(Node);
316 unsigned NF = Node->getNumValues() - 1;
317 MVT VT = Node->getSimpleValueType(0);
318 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
320
321 unsigned CurOp = 2;
323
324 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
325 Node->op_begin() + CurOp + NF);
326 bool IsTU = IsMasked || !isAllUndef(Regs);
327 if (IsTU) {
328 SDValue Merge = createTuple(*CurDAG, Regs, NF, LMUL);
329 Operands.push_back(Merge);
330 }
331 CurOp += NF;
332
333 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
334 Operands, /*IsLoad=*/true);
335
336 const RISCV::VLSEGPseudo *P =
337 RISCV::getVLSEGPseudo(NF, IsMasked, IsTU, IsStrided, /*FF*/ false, Log2SEW,
338 static_cast<unsigned>(LMUL));
339 MachineSDNode *Load =
341
342 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
343 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
344
345 SDValue SuperReg = SDValue(Load, 0);
346 for (unsigned I = 0; I < NF; ++I) {
347 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
348 ReplaceUses(SDValue(Node, I),
349 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
350 }
351
352 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
353 CurDAG->RemoveDeadNode(Node);
354}
355
356void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) {
357 SDLoc DL(Node);
358 unsigned NF = Node->getNumValues() - 2; // Do not count VL and Chain.
359 MVT VT = Node->getSimpleValueType(0);
360 MVT XLenVT = Subtarget->getXLenVT();
361 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
363
364 unsigned CurOp = 2;
366
367 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
368 Node->op_begin() + CurOp + NF);
369 bool IsTU = IsMasked || !isAllUndef(Regs);
370 if (IsTU) {
371 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
372 Operands.push_back(MaskedOff);
373 }
374 CurOp += NF;
375
376 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
377 /*IsStridedOrIndexed*/ false, Operands,
378 /*IsLoad=*/true);
379
380 const RISCV::VLSEGPseudo *P =
381 RISCV::getVLSEGPseudo(NF, IsMasked, IsTU, /*Strided*/ false, /*FF*/ true,
382 Log2SEW, static_cast<unsigned>(LMUL));
384 XLenVT, MVT::Other, Operands);
385
386 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
387 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
388
389 SDValue SuperReg = SDValue(Load, 0);
390 for (unsigned I = 0; I < NF; ++I) {
391 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
392 ReplaceUses(SDValue(Node, I),
393 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
394 }
395
396 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); // VL
397 ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 2)); // Chain
398 CurDAG->RemoveDeadNode(Node);
399}
400
401void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked,
402 bool IsOrdered) {
403 SDLoc DL(Node);
404 unsigned NF = Node->getNumValues() - 1;
405 MVT VT = Node->getSimpleValueType(0);
406 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
408
409 unsigned CurOp = 2;
411
412 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
413 Node->op_begin() + CurOp + NF);
414 bool IsTU = IsMasked || !isAllUndef(Regs);
415 if (IsTU) {
416 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
417 Operands.push_back(MaskedOff);
418 }
419 CurOp += NF;
420
421 MVT IndexVT;
422 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
423 /*IsStridedOrIndexed*/ true, Operands,
424 /*IsLoad=*/true, &IndexVT);
425
427 "Element count mismatch");
428
429 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
430 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
431 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
432 report_fatal_error("The V extension does not support EEW=64 for index "
433 "values when XLEN=32");
434 }
435 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
436 NF, IsMasked, IsTU, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
437 static_cast<unsigned>(IndexLMUL));
438 MachineSDNode *Load =
440
441 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
442 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
443
444 SDValue SuperReg = SDValue(Load, 0);
445 for (unsigned I = 0; I < NF; ++I) {
446 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
447 ReplaceUses(SDValue(Node, I),
448 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
449 }
450
451 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
452 CurDAG->RemoveDeadNode(Node);
453}
454
455void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, bool IsMasked,
456 bool IsStrided) {
457 SDLoc DL(Node);
458 unsigned NF = Node->getNumOperands() - 4;
459 if (IsStrided)
460 NF--;
461 if (IsMasked)
462 NF--;
463 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
464 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
466 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
467 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
468
470 Operands.push_back(StoreVal);
471 unsigned CurOp = 2 + NF;
472
473 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
474 Operands);
475
476 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
477 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
478 MachineSDNode *Store =
479 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
480
481 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
482 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
483
484 ReplaceNode(Node, Store);
485}
486
487void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked,
488 bool IsOrdered) {
489 SDLoc DL(Node);
490 unsigned NF = Node->getNumOperands() - 5;
491 if (IsMasked)
492 --NF;
493 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
494 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
496 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
497 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
498
500 Operands.push_back(StoreVal);
501 unsigned CurOp = 2 + NF;
502
503 MVT IndexVT;
504 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
505 /*IsStridedOrIndexed*/ true, Operands,
506 /*IsLoad=*/false, &IndexVT);
507
509 "Element count mismatch");
510
511 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
512 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
513 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
514 report_fatal_error("The V extension does not support EEW=64 for index "
515 "values when XLEN=32");
516 }
517 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
518 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
519 static_cast<unsigned>(IndexLMUL));
520 MachineSDNode *Store =
521 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
522
523 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
524 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
525
526 ReplaceNode(Node, Store);
527}
528
530 if (!Subtarget->hasVInstructions())
531 return;
532
533 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
534
535 SDLoc DL(Node);
536 MVT XLenVT = Subtarget->getXLenVT();
537
538 unsigned IntNo = Node->getConstantOperandVal(0);
539
540 assert((IntNo == Intrinsic::riscv_vsetvli ||
541 IntNo == Intrinsic::riscv_vsetvlimax) &&
542 "Unexpected vsetvli intrinsic");
543
544 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
545 unsigned Offset = (VLMax ? 1 : 2);
546
547 assert(Node->getNumOperands() == Offset + 2 &&
548 "Unexpected number of operands");
549
550 unsigned SEW =
551 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
552 RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>(
553 Node->getConstantOperandVal(Offset + 1) & 0x7);
554
555 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
556 /*MaskAgnostic*/ true);
557 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
558
559 SmallVector<EVT, 2> VTs = {XLenVT};
560
561 SDValue VLOperand;
562 unsigned Opcode = RISCV::PseudoVSETVLI;
563 if (VLMax) {
564 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
565 Opcode = RISCV::PseudoVSETVLIX0;
566 } else {
567 VLOperand = Node->getOperand(1);
568
569 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
570 uint64_t AVL = C->getZExtValue();
571 if (isUInt<5>(AVL)) {
572 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
573 SmallVector<SDValue, 3> Ops = {VLImm, VTypeIOp};
575 Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, VTs, Ops));
576 return;
577 }
578 }
579 }
580
581 SmallVector<SDValue, 3> Ops = {VLOperand, VTypeIOp};
582
583 ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, VTs, Ops));
584}
585
587 MVT VT = Node->getSimpleValueType(0);
588 unsigned Opcode = Node->getOpcode();
589 assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&
590 "Unexpected opcode");
591 SDLoc DL(Node);
592
593 // For operations of the form (x << C1) op C2, check if we can use
594 // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
595 SDValue N0 = Node->getOperand(0);
596 SDValue N1 = Node->getOperand(1);
597
598 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
599 if (!Cst)
600 return false;
601
602 int64_t Val = Cst->getSExtValue();
603
604 // Check if immediate can already use ANDI/ORI/XORI.
605 if (isInt<12>(Val))
606 return false;
607
608 SDValue Shift = N0;
609
610 // If Val is simm32 and we have a sext_inreg from i32, then the binop
611 // produces at least 33 sign bits. We can peek through the sext_inreg and use
612 // a SLLIW at the end.
613 bool SignExt = false;
614 if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
615 N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) {
616 SignExt = true;
617 Shift = N0.getOperand(0);
618 }
619
620 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
621 return false;
622
623 ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
624 if (!ShlCst)
625 return false;
626
627 uint64_t ShAmt = ShlCst->getZExtValue();
628
629 // Make sure that we don't change the operation by removing bits.
630 // This only matters for OR and XOR, AND is unaffected.
631 uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt);
632 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
633 return false;
634
635 int64_t ShiftedVal = Val >> ShAmt;
636 if (!isInt<12>(ShiftedVal))
637 return false;
638
639 // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
640 if (SignExt && ShAmt >= 32)
641 return false;
642
643 // Ok, we can reorder to get a smaller immediate.
644 unsigned BinOpc;
645 switch (Opcode) {
646 default: llvm_unreachable("Unexpected opcode");
647 case ISD::AND: BinOpc = RISCV::ANDI; break;
648 case ISD::OR: BinOpc = RISCV::ORI; break;
649 case ISD::XOR: BinOpc = RISCV::XORI; break;
650 }
651
652 unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;
653
654 SDNode *BinOp =
655 CurDAG->getMachineNode(BinOpc, DL, VT, Shift.getOperand(0),
656 CurDAG->getTargetConstant(ShiftedVal, DL, VT));
657 SDNode *SLLI =
658 CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0),
659 CurDAG->getTargetConstant(ShAmt, DL, VT));
660 ReplaceNode(Node, SLLI);
661 return true;
662}
663
665 // Only supported with XTHeadBb at the moment.
666 if (!Subtarget->hasVendorXTHeadBb())
667 return false;
668
669 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
670 if (!N1C)
671 return false;
672
673 SDValue N0 = Node->getOperand(0);
674 if (!N0.hasOneUse())
675 return false;
676
677 auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb, SDLoc DL,
678 MVT VT) {
679 return CurDAG->getMachineNode(RISCV::TH_EXT, DL, VT, N0.getOperand(0),
680 CurDAG->getTargetConstant(Msb, DL, VT),
681 CurDAG->getTargetConstant(Lsb, DL, VT));
682 };
683
684 SDLoc DL(Node);
685 MVT VT = Node->getSimpleValueType(0);
686 const unsigned RightShAmt = N1C->getZExtValue();
687
688 // Transform (sra (shl X, C1) C2) with C1 < C2
689 // -> (TH.EXT X, msb, lsb)
690 if (N0.getOpcode() == ISD::SHL) {
691 auto *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
692 if (!N01C)
693 return false;
694
695 const unsigned LeftShAmt = N01C->getZExtValue();
696 // Make sure that this is a bitfield extraction (i.e., the shift-right
697 // amount can not be less than the left-shift).
698 if (LeftShAmt > RightShAmt)
699 return false;
700
701 const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
702 const unsigned Msb = MsbPlusOne - 1;
703 const unsigned Lsb = RightShAmt - LeftShAmt;
704
705 SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
706 ReplaceNode(Node, TH_EXT);
707 return true;
708 }
709
710 // Transform (sra (sext_inreg X, _), C) ->
711 // (TH.EXT X, msb, lsb)
712 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
713 unsigned ExtSize =
714 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
715
716 // ExtSize of 32 should use sraiw via tablegen pattern.
717 if (ExtSize == 32)
718 return false;
719
720 const unsigned Msb = ExtSize - 1;
721 const unsigned Lsb = RightShAmt;
722
723 SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
724 ReplaceNode(Node, TH_EXT);
725 return true;
726 }
727
728 return false;
729}
730
732 // Target does not support indexed loads.
733 if (!Subtarget->hasVendorXTHeadMemIdx())
734 return false;
735
736 LoadSDNode *Ld = cast<LoadSDNode>(Node);
738 if (AM == ISD::UNINDEXED)
739 return false;
740
741 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Ld->getOffset());
742 if (!C)
743 return false;
744
745 EVT LoadVT = Ld->getMemoryVT();
746 bool IsPre = (AM == ISD::PRE_INC || AM == ISD::PRE_DEC);
747 bool IsPost = (AM == ISD::POST_INC || AM == ISD::POST_DEC);
748 int64_t Offset = C->getSExtValue();
749
750 // Convert decrements to increments by a negative quantity.
751 if (AM == ISD::PRE_DEC || AM == ISD::POST_DEC)
752 Offset = -Offset;
753
754 // The constants that can be encoded in the THeadMemIdx instructions
755 // are of the form (sign_extend(imm5) << imm2).
756 int64_t Shift;
757 for (Shift = 0; Shift < 4; Shift++)
758 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
759 break;
760
761 // Constant cannot be encoded.
762 if (Shift == 4)
763 return false;
764
765 bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
766 unsigned Opcode;
767 if (LoadVT == MVT::i8 && IsPre)
768 Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
769 else if (LoadVT == MVT::i8 && IsPost)
770 Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
771 else if (LoadVT == MVT::i16 && IsPre)
772 Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
773 else if (LoadVT == MVT::i16 && IsPost)
774 Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
775 else if (LoadVT == MVT::i32 && IsPre)
776 Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
777 else if (LoadVT == MVT::i32 && IsPost)
778 Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
779 else if (LoadVT == MVT::i64 && IsPre)
780 Opcode = RISCV::TH_LDIB;
781 else if (LoadVT == MVT::i64 && IsPost)
782 Opcode = RISCV::TH_LDIA;
783 else
784 return false;
785
786 EVT Ty = Ld->getOffset().getValueType();
787 SDValue Ops[] = {Ld->getBasePtr(),
788 CurDAG->getTargetConstant(Offset >> Shift, SDLoc(Node), Ty),
789 CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty),
790 Ld->getChain()};
791 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0),
792 Ld->getValueType(1), MVT::Other, Ops);
793
794 MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand();
795 CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp});
796
797 ReplaceNode(Node, New);
798
799 return true;
800}
801
803 // If we have a custom node, we have already selected.
804 if (Node->isMachineOpcode()) {
805 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
806 Node->setNodeId(-1);
807 return;
808 }
809
810 // Instruction Selection not handled by the auto-generated tablegen selection
811 // should be handled here.
812 unsigned Opcode = Node->getOpcode();
813 MVT XLenVT = Subtarget->getXLenVT();
814 SDLoc DL(Node);
815 MVT VT = Node->getSimpleValueType(0);
816
817 bool HasBitTest = Subtarget->hasStdExtZbs() || Subtarget->hasVendorXTHeadBs();
818
819 switch (Opcode) {
820 case ISD::Constant: {
821 assert(VT == Subtarget->getXLenVT() && "Unexpected VT");
822 auto *ConstNode = cast<ConstantSDNode>(Node);
823 if (ConstNode->isZero()) {
824 SDValue New =
825 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
826 ReplaceNode(Node, New.getNode());
827 return;
828 }
829 int64_t Imm = ConstNode->getSExtValue();
830 // If the upper XLen-16 bits are not used, try to convert this to a simm12
831 // by sign extending bit 15.
832 if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) &&
833 hasAllHUsers(Node))
834 Imm = SignExtend64<16>(Imm);
835 // If the upper 32-bits are not used try to convert this into a simm32 by
836 // sign extending bit 32.
837 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
838 Imm = SignExtend64<32>(Imm);
839
840 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode());
841 return;
842 }
843 case ISD::ConstantFP: {
844 const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();
845 int FPImm = static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm(
846 APF, VT);
847 if (FPImm >= 0) {
848 unsigned Opc;
849 switch (VT.SimpleTy) {
850 default:
851 llvm_unreachable("Unexpected size");
852 case MVT::f16:
853 Opc = RISCV::FLI_H;
854 break;
855 case MVT::f32:
856 Opc = RISCV::FLI_S;
857 break;
858 case MVT::f64:
859 Opc = RISCV::FLI_D;
860 break;
861 }
862
864 Opc, DL, VT, CurDAG->getTargetConstant(FPImm, DL, XLenVT));
865 ReplaceNode(Node, Res);
866 return;
867 }
868
869 bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
870 SDValue Imm;
871 // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
872 // create an integer immediate.
873 if (APF.isPosZero() || NegZeroF64)
874 Imm = CurDAG->getRegister(RISCV::X0, XLenVT);
875 else
876 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
877 *Subtarget);
878
879 unsigned Opc;
880 switch (VT.SimpleTy) {
881 default:
882 llvm_unreachable("Unexpected size");
883 case MVT::f16:
884 Opc = RISCV::FMV_H_X;
885 break;
886 case MVT::f32:
887 Opc = RISCV::FMV_W_X;
888 break;
889 case MVT::f64:
890 // For RV32, we can't move from a GPR, we need to convert instead. This
891 // should only happen for +0.0 and -0.0.
892 assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
893 Opc = Subtarget->is64Bit() ? RISCV::FMV_D_X : RISCV::FCVT_D_W;
894 break;
895 }
896
897 SDNode *Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
898
899 // For f64 -0.0, we need to insert a fneg.d idiom.
900 if (NegZeroF64)
901 Res = CurDAG->getMachineNode(RISCV::FSGNJN_D, DL, VT, SDValue(Res, 0),
902 SDValue(Res, 0));
903
904 ReplaceNode(Node, Res);
905 return;
906 }
907 case RISCVISD::SplitF64: {
908 if (!Subtarget->hasStdExtZfa())
909 break;
910 assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
911 "Unexpected subtarget");
912
913 // With Zfa, lower to fmv.x.w and fmvh.x.d.
914 if (!SDValue(Node, 0).use_empty()) {
915 SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT,
916 Node->getOperand(0));
917 ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0));
918 }
919 if (!SDValue(Node, 1).use_empty()) {
920 SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT,
921 Node->getOperand(0));
922 ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0));
923 }
924
925 CurDAG->RemoveDeadNode(Node);
926 return;
927 }
928 case ISD::SHL: {
929 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
930 if (!N1C)
931 break;
932 SDValue N0 = Node->getOperand(0);
933 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
934 !isa<ConstantSDNode>(N0.getOperand(1)))
935 break;
936 unsigned ShAmt = N1C->getZExtValue();
937 uint64_t Mask = N0.getConstantOperandVal(1);
938
939 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) where C2 has
940 // 32 leading zeros and C3 trailing zeros.
941 if (ShAmt <= 32 && isShiftedMask_64(Mask)) {
942 unsigned XLen = Subtarget->getXLen();
943 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
944 unsigned TrailingZeros = llvm::countr_zero(Mask);
945 if (TrailingZeros > 0 && LeadingZeros == 32) {
946 SDNode *SRLIW = CurDAG->getMachineNode(
947 RISCV::SRLIW, DL, VT, N0->getOperand(0),
948 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
950 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
951 CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));
952 ReplaceNode(Node, SLLI);
953 return;
954 }
955 }
956 break;
957 }
958 case ISD::SRL: {
959 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
960 if (!N1C)
961 break;
962 SDValue N0 = Node->getOperand(0);
963 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
964 break;
965 unsigned ShAmt = N1C->getZExtValue();
966 uint64_t Mask = N0.getConstantOperandVal(1);
967
968 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
969 // 32 leading zeros and C3 trailing zeros.
970 if (isShiftedMask_64(Mask) && N0.hasOneUse()) {
971 unsigned XLen = Subtarget->getXLen();
972 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
973 unsigned TrailingZeros = llvm::countr_zero(Mask);
974 if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
975 SDNode *SRLIW = CurDAG->getMachineNode(
976 RISCV::SRLIW, DL, VT, N0->getOperand(0),
977 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
979 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
980 CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT));
981 ReplaceNode(Node, SLLI);
982 return;
983 }
984 }
985
986 // Optimize (srl (and X, C2), C) ->
987 // (srli (slli X, (XLen-C3), (XLen-C3) + C)
988 // Where C2 is a mask with C3 trailing ones.
989 // Taking into account that the C2 may have had lower bits unset by
990 // SimplifyDemandedBits. This avoids materializing the C2 immediate.
991 // This pattern occurs when type legalizing right shifts for types with
992 // less than XLen bits.
993 Mask |= maskTrailingOnes<uint64_t>(ShAmt);
994 if (!isMask_64(Mask))
995 break;
996 unsigned TrailingOnes = llvm::countr_one(Mask);
997 if (ShAmt >= TrailingOnes)
998 break;
999 // If the mask has 32 trailing ones, use SRLIW.
1000 if (TrailingOnes == 32) {
1001 SDNode *SRLIW =
1002 CurDAG->getMachineNode(RISCV::SRLIW, DL, VT, N0->getOperand(0),
1003 CurDAG->getTargetConstant(ShAmt, DL, VT));
1004 ReplaceNode(Node, SRLIW);
1005 return;
1006 }
1007
1008 // Only do the remaining transforms if the AND has one use.
1009 if (!N0.hasOneUse())
1010 break;
1011
1012 // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1013 if (HasBitTest && ShAmt + 1 == TrailingOnes) {
1014 SDNode *BEXTI = CurDAG->getMachineNode(
1015 Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT,
1016 N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1017 ReplaceNode(Node, BEXTI);
1018 return;
1019 }
1020
1021 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
1022 SDNode *SLLI =
1023 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1024 CurDAG->getTargetConstant(LShAmt, DL, VT));
1025 SDNode *SRLI = CurDAG->getMachineNode(
1026 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1027 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1028 ReplaceNode(Node, SRLI);
1029 return;
1030 }
1031 case ISD::SRA: {
1032 if (trySignedBitfieldExtract(Node))
1033 return;
1034
1035 // Optimize (sra (sext_inreg X, i16), C) ->
1036 // (srai (slli X, (XLen-16), (XLen-16) + C)
1037 // And (sra (sext_inreg X, i8), C) ->
1038 // (srai (slli X, (XLen-8), (XLen-8) + C)
1039 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1040 // This transform matches the code we get without Zbb. The shifts are more
1041 // compressible, and this can help expose CSE opportunities in the sdiv by
1042 // constant optimization.
1043 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1044 if (!N1C)
1045 break;
1046 SDValue N0 = Node->getOperand(0);
1047 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
1048 break;
1049 unsigned ShAmt = N1C->getZExtValue();
1050 unsigned ExtSize =
1051 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
1052 // ExtSize of 32 should use sraiw via tablegen pattern.
1053 if (ExtSize >= 32 || ShAmt >= ExtSize)
1054 break;
1055 unsigned LShAmt = Subtarget->getXLen() - ExtSize;
1056 SDNode *SLLI =
1057 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1058 CurDAG->getTargetConstant(LShAmt, DL, VT));
1059 SDNode *SRAI = CurDAG->getMachineNode(
1060 RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
1061 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1062 ReplaceNode(Node, SRAI);
1063 return;
1064 }
1065 case ISD::OR:
1066 case ISD::XOR:
1067 if (tryShrinkShlLogicImm(Node))
1068 return;
1069
1070 break;
1071 case ISD::AND: {
1072 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1073 if (!N1C)
1074 break;
1075 uint64_t C1 = N1C->getZExtValue();
1076 const bool isC1Mask = isMask_64(C1);
1077 const bool isC1ANDI = isInt<12>(C1);
1078
1079 SDValue N0 = Node->getOperand(0);
1080
1081 auto tryUnsignedBitfieldExtract = [&](SDNode *Node, SDLoc DL, MVT VT,
1082 SDValue X, unsigned Msb,
1083 unsigned Lsb) {
1084 if (!Subtarget->hasVendorXTHeadBb())
1085 return false;
1086
1087 SDNode *TH_EXTU = CurDAG->getMachineNode(
1088 RISCV::TH_EXTU, DL, VT, X, CurDAG->getTargetConstant(Msb, DL, VT),
1089 CurDAG->getTargetConstant(Lsb, DL, VT));
1090 ReplaceNode(Node, TH_EXTU);
1091 return true;
1092 };
1093
1094 bool LeftShift = N0.getOpcode() == ISD::SHL;
1095 if (LeftShift || N0.getOpcode() == ISD::SRL) {
1096 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1097 if (!C)
1098 break;
1099 unsigned C2 = C->getZExtValue();
1100 unsigned XLen = Subtarget->getXLen();
1101 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1102
1103 // Keep track of whether this is a c.andi. If we can't use c.andi, the
1104 // shift pair might offer more compression opportunities.
1105 // TODO: We could check for C extension here, but we don't have many lit
1106 // tests with the C extension enabled so not checking gets better
1107 // coverage.
1108 // TODO: What if ANDI faster than shift?
1109 bool IsCANDI = isInt<6>(N1C->getSExtValue());
1110
1111 // Clear irrelevant bits in the mask.
1112 if (LeftShift)
1113 C1 &= maskTrailingZeros<uint64_t>(C2);
1114 else
1115 C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
1116
1117 // Some transforms should only be done if the shift has a single use or
1118 // the AND would become (srli (slli X, 32), 32)
1119 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
1120
1121 SDValue X = N0.getOperand(0);
1122
1123 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1124 // with c3 leading zeros.
1125 if (!LeftShift && isC1Mask) {
1126 unsigned Leading = XLen - llvm::bit_width(C1);
1127 if (C2 < Leading) {
1128 // If the number of leading zeros is C2+32 this can be SRLIW.
1129 if (C2 + 32 == Leading) {
1130 SDNode *SRLIW = CurDAG->getMachineNode(
1131 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
1132 ReplaceNode(Node, SRLIW);
1133 return;
1134 }
1135
1136 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1137 // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1138 //
1139 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1140 // legalized and goes through DAG combine.
1141 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
1142 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1143 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
1144 SDNode *SRAIW =
1145 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0),
1146 CurDAG->getTargetConstant(31, DL, VT));
1147 SDNode *SRLIW = CurDAG->getMachineNode(
1148 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),
1149 CurDAG->getTargetConstant(Leading - 32, DL, VT));
1150 ReplaceNode(Node, SRLIW);
1151 return;
1152 }
1153
1154 // Try to use an unsigned bitfield extract (e.g., th.extu) if
1155 // available.
1156 // Transform (and (srl x, C2), C1)
1157 // -> (<bfextract> x, msb, lsb)
1158 //
1159 // Make sure to keep this below the SRLIW cases, as we always want to
1160 // prefer the more common instruction.
1161 const unsigned Msb = llvm::bit_width(C1) + C2 - 1;
1162 const unsigned Lsb = C2;
1163 if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
1164 return;
1165
1166 // (srli (slli x, c3-c2), c3).
1167 // Skip if we could use (zext.w (sraiw X, C2)).
1168 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
1169 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1170 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
1171 // Also Skip if we can use bexti or th.tst.
1172 Skip |= HasBitTest && Leading == XLen - 1;
1173 if (OneUseOrZExtW && !Skip) {
1174 SDNode *SLLI = CurDAG->getMachineNode(
1175 RISCV::SLLI, DL, VT, X,
1176 CurDAG->getTargetConstant(Leading - C2, DL, VT));
1177 SDNode *SRLI = CurDAG->getMachineNode(
1178 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1179 CurDAG->getTargetConstant(Leading, DL, VT));
1180 ReplaceNode(Node, SRLI);
1181 return;
1182 }
1183 }
1184 }
1185
1186 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1187 // shifted by c2 bits with c3 leading zeros.
1188 if (LeftShift && isShiftedMask_64(C1)) {
1189 unsigned Leading = XLen - llvm::bit_width(C1);
1190
1191 if (C2 + Leading < XLen &&
1192 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) {
1193 // Use slli.uw when possible.
1194 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
1195 SDNode *SLLI_UW =
1196 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X,
1197 CurDAG->getTargetConstant(C2, DL, VT));
1198 ReplaceNode(Node, SLLI_UW);
1199 return;
1200 }
1201
1202 // (srli (slli c2+c3), c3)
1203 if (OneUseOrZExtW && !IsCANDI) {
1204 SDNode *SLLI = CurDAG->getMachineNode(
1205 RISCV::SLLI, DL, VT, X,
1206 CurDAG->getTargetConstant(C2 + Leading, DL, VT));
1207 SDNode *SRLI = CurDAG->getMachineNode(
1208 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1209 CurDAG->getTargetConstant(Leading, DL, VT));
1210 ReplaceNode(Node, SRLI);
1211 return;
1212 }
1213 }
1214 }
1215
1216 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1217 // shifted mask with c2 leading zeros and c3 trailing zeros.
1218 if (!LeftShift && isShiftedMask_64(C1)) {
1219 unsigned Leading = XLen - llvm::bit_width(C1);
1220 unsigned Trailing = llvm::countr_zero(C1);
1221 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
1222 !IsCANDI) {
1223 unsigned SrliOpc = RISCV::SRLI;
1224 // If the input is zexti32 we should use SRLIW.
1225 if (X.getOpcode() == ISD::AND &&
1226 isa<ConstantSDNode>(X.getOperand(1)) &&
1227 X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
1228 SrliOpc = RISCV::SRLIW;
1229 X = X.getOperand(0);
1230 }
1231 SDNode *SRLI = CurDAG->getMachineNode(
1232 SrliOpc, DL, VT, X,
1233 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1234 SDNode *SLLI = CurDAG->getMachineNode(
1235 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1236 CurDAG->getTargetConstant(Trailing, DL, VT));
1237 ReplaceNode(Node, SLLI);
1238 return;
1239 }
1240 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1241 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
1242 OneUseOrZExtW && !IsCANDI) {
1243 SDNode *SRLIW = CurDAG->getMachineNode(
1244 RISCV::SRLIW, DL, VT, X,
1245 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1246 SDNode *SLLI = CurDAG->getMachineNode(
1247 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1248 CurDAG->getTargetConstant(Trailing, DL, VT));
1249 ReplaceNode(Node, SLLI);
1250 return;
1251 }
1252 }
1253
1254 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1255 // shifted mask with no leading zeros and c3 trailing zeros.
1256 if (LeftShift && isShiftedMask_64(C1)) {
1257 unsigned Leading = XLen - llvm::bit_width(C1);
1258 unsigned Trailing = llvm::countr_zero(C1);
1259 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
1260 SDNode *SRLI = CurDAG->getMachineNode(
1261 RISCV::SRLI, DL, VT, X,
1262 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1263 SDNode *SLLI = CurDAG->getMachineNode(
1264 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1265 CurDAG->getTargetConstant(Trailing, DL, VT));
1266 ReplaceNode(Node, SLLI);
1267 return;
1268 }
1269 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1270 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
1271 SDNode *SRLIW = CurDAG->getMachineNode(
1272 RISCV::SRLIW, DL, VT, X,
1273 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1274 SDNode *SLLI = CurDAG->getMachineNode(
1275 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1276 CurDAG->getTargetConstant(Trailing, DL, VT));
1277 ReplaceNode(Node, SLLI);
1278 return;
1279 }
1280 }
1281 }
1282
1283 // If C1 masks off the upper bits only (but can't be formed as an
1284 // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1285 // available.
1286 // Transform (and x, C1)
1287 // -> (<bfextract> x, msb, lsb)
1288 if (isC1Mask && !isC1ANDI) {
1289 const unsigned Msb = llvm::bit_width(C1) - 1;
1290 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0))
1291 return;
1292 }
1293
1294 if (tryShrinkShlLogicImm(Node))
1295 return;
1296
1297 break;
1298 }
1299 case ISD::MUL: {
1300 // Special case for calculating (mul (and X, C2), C1) where the full product
1301 // fits in XLen bits. We can shift X left by the number of leading zeros in
1302 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1303 // product has XLen trailing zeros, putting it in the output of MULHU. This
1304 // can avoid materializing a constant in a register for C2.
1305
1306 // RHS should be a constant.
1307 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1308 if (!N1C || !N1C->hasOneUse())
1309 break;
1310
1311 // LHS should be an AND with constant.
1312 SDValue N0 = Node->getOperand(0);
1313 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1314 break;
1315
1316 uint64_t C2 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
1317
1318 // Constant should be a mask.
1319 if (!isMask_64(C2))
1320 break;
1321
1322 // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1323 // multiple users or the constant is a simm12. This prevents inserting a
1324 // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1325 // make it more costly to materialize. Otherwise, using a SLLI might allow
1326 // it to be compressed.
1327 bool IsANDIOrZExt =
1328 isInt<12>(C2) ||
1329 (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
1330 // With XTHeadBb, we can use TH.EXTU.
1331 IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
1332 if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse()))
1333 break;
1334 // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1335 // the constant is a simm32.
1336 bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
1337 // With XTHeadBb, we can use TH.EXTU.
1338 IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
1339 if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse()))
1340 break;
1341
1342 // We need to shift left the AND input and C1 by a total of XLen bits.
1343
1344 // How far left do we need to shift the AND input?
1345 unsigned XLen = Subtarget->getXLen();
1346 unsigned LeadingZeros = XLen - llvm::bit_width(C2);
1347
1348 // The constant gets shifted by the remaining amount unless that would
1349 // shift bits out.
1350 uint64_t C1 = N1C->getZExtValue();
1351 unsigned ConstantShift = XLen - LeadingZeros;
1352 if (ConstantShift > (XLen - llvm::bit_width(C1)))
1353 break;
1354
1355 uint64_t ShiftedC1 = C1 << ConstantShift;
1356 // If this RV32, we need to sign extend the constant.
1357 if (XLen == 32)
1358 ShiftedC1 = SignExtend64<32>(ShiftedC1);
1359
1360 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1361 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode();
1362 SDNode *SLLI =
1363 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1364 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1365 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,
1366 SDValue(SLLI, 0), SDValue(Imm, 0));
1367 ReplaceNode(Node, MULHU);
1368 return;
1369 }
1370 case ISD::LOAD: {
1371 if (tryIndexedLoad(Node))
1372 return;
1373 break;
1374 }
1376 unsigned IntNo = Node->getConstantOperandVal(0);
1377 switch (IntNo) {
1378 // By default we do not custom select any intrinsic.
1379 default:
1380 break;
1381 case Intrinsic::riscv_vmsgeu:
1382 case Intrinsic::riscv_vmsge: {
1383 SDValue Src1 = Node->getOperand(1);
1384 SDValue Src2 = Node->getOperand(2);
1385 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
1386 bool IsCmpUnsignedZero = false;
1387 // Only custom select scalar second operand.
1388 if (Src2.getValueType() != XLenVT)
1389 break;
1390 // Small constants are handled with patterns.
1391 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1392 int64_t CVal = C->getSExtValue();
1393 if (CVal >= -15 && CVal <= 16) {
1394 if (!IsUnsigned || CVal != 0)
1395 break;
1396 IsCmpUnsignedZero = true;
1397 }
1398 }
1399 MVT Src1VT = Src1.getSimpleValueType();
1400 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode;
1401 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1402 default:
1403 llvm_unreachable("Unexpected LMUL!");
1404#define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b) \
1405 case RISCVII::VLMUL::lmulenum: \
1406 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1407 : RISCV::PseudoVMSLT_VX_##suffix; \
1408 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \
1409 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \
1410 break;
1411 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F8, MF8, B1)
1412 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F4, MF4, B2)
1413 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F2, MF2, B4)
1415 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_2, M2, B16)
1416 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_4, M4, B32)
1417 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_8, M8, B64)
1418#undef CASE_VMSLT_VMNAND_VMSET_OPCODES
1419 }
1421 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1422 SDValue VL;
1423 selectVLOp(Node->getOperand(3), VL);
1424
1425 // If vmsgeu with 0 immediate, expand it to vmset.
1426 if (IsCmpUnsignedZero) {
1427 ReplaceNode(Node, CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW));
1428 return;
1429 }
1430
1431 // Expand to
1432 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
1433 SDValue Cmp = SDValue(
1434 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1435 0);
1436 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT,
1437 {Cmp, Cmp, VL, SEW}));
1438 return;
1439 }
1440 case Intrinsic::riscv_vmsgeu_mask:
1441 case Intrinsic::riscv_vmsge_mask: {
1442 SDValue Src1 = Node->getOperand(2);
1443 SDValue Src2 = Node->getOperand(3);
1444 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
1445 bool IsCmpUnsignedZero = false;
1446 // Only custom select scalar second operand.
1447 if (Src2.getValueType() != XLenVT)
1448 break;
1449 // Small constants are handled with patterns.
1450 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1451 int64_t CVal = C->getSExtValue();
1452 if (CVal >= -15 && CVal <= 16) {
1453 if (!IsUnsigned || CVal != 0)
1454 break;
1455 IsCmpUnsignedZero = true;
1456 }
1457 }
1458 MVT Src1VT = Src1.getSimpleValueType();
1459 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
1460 VMOROpcode;
1461 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1462 default:
1463 llvm_unreachable("Unexpected LMUL!");
1464#define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b) \
1465 case RISCVII::VLMUL::lmulenum: \
1466 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1467 : RISCV::PseudoVMSLT_VX_##suffix; \
1468 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \
1469 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \
1470 break;
1471 CASE_VMSLT_OPCODES(LMUL_F8, MF8, B1)
1472 CASE_VMSLT_OPCODES(LMUL_F4, MF4, B2)
1473 CASE_VMSLT_OPCODES(LMUL_F2, MF2, B4)
1474 CASE_VMSLT_OPCODES(LMUL_1, M1, B8)
1475 CASE_VMSLT_OPCODES(LMUL_2, M2, B16)
1476 CASE_VMSLT_OPCODES(LMUL_4, M4, B32)
1477 CASE_VMSLT_OPCODES(LMUL_8, M8, B64)
1478#undef CASE_VMSLT_OPCODES
1479 }
1480 // Mask operations use the LMUL from the mask type.
1481 switch (RISCVTargetLowering::getLMUL(VT)) {
1482 default:
1483 llvm_unreachable("Unexpected LMUL!");
1484#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \
1485 case RISCVII::VLMUL::lmulenum: \
1486 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \
1487 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \
1488 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \
1489 break;
1490 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, MF8)
1491 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, MF4)
1492 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, MF2)
1497#undef CASE_VMXOR_VMANDN_VMOR_OPCODES
1498 }
1500 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1501 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
1502 SDValue VL;
1503 selectVLOp(Node->getOperand(5), VL);
1504 SDValue MaskedOff = Node->getOperand(1);
1505 SDValue Mask = Node->getOperand(4);
1506
1507 // If vmsgeu_mask with 0 immediate, expand it to vmor mask, maskedoff.
1508 if (IsCmpUnsignedZero) {
1509 // We don't need vmor if the MaskedOff and the Mask are the same
1510 // value.
1511 if (Mask == MaskedOff) {
1512 ReplaceUses(Node, Mask.getNode());
1513 return;
1514 }
1515 ReplaceNode(Node,
1516 CurDAG->getMachineNode(VMOROpcode, DL, VT,
1517 {Mask, MaskedOff, VL, MaskSEW}));
1518 return;
1519 }
1520
1521 // If the MaskedOff value and the Mask are the same value use
1522 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
1523 // This avoids needing to copy v0 to vd before starting the next sequence.
1524 if (Mask == MaskedOff) {
1525 SDValue Cmp = SDValue(
1526 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1527 0);
1528 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
1529 {Mask, Cmp, VL, MaskSEW}));
1530 return;
1531 }
1532
1533 // Mask needs to be copied to V0.
1535 RISCV::V0, Mask, SDValue());
1536 SDValue Glue = Chain.getValue(1);
1537 SDValue V0 = CurDAG->getRegister(RISCV::V0, VT);
1538
1539 // Otherwise use
1540 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
1541 // The result is mask undisturbed.
1542 // We use the same instructions to emulate mask agnostic behavior, because
1543 // the agnostic result can be either undisturbed or all 1.
1544 SDValue Cmp = SDValue(
1545 CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
1546 {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}),
1547 0);
1548 // vmxor.mm vd, vd, v0 is used to update active value.
1549 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
1550 {Cmp, Mask, VL, MaskSEW}));
1551 return;
1552 }
1553 case Intrinsic::riscv_vsetvli:
1554 case Intrinsic::riscv_vsetvlimax:
1555 return selectVSETVLI(Node);
1556 }
1557 break;
1558 }
1560 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
1561 switch (IntNo) {
1562 // By default we do not custom select any intrinsic.
1563 default:
1564 break;
1565 case Intrinsic::riscv_vlseg2:
1566 case Intrinsic::riscv_vlseg3:
1567 case Intrinsic::riscv_vlseg4:
1568 case Intrinsic::riscv_vlseg5:
1569 case Intrinsic::riscv_vlseg6:
1570 case Intrinsic::riscv_vlseg7:
1571 case Intrinsic::riscv_vlseg8: {
1572 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);
1573 return;
1574 }
1575 case Intrinsic::riscv_vlseg2_mask:
1576 case Intrinsic::riscv_vlseg3_mask:
1577 case Intrinsic::riscv_vlseg4_mask:
1578 case Intrinsic::riscv_vlseg5_mask:
1579 case Intrinsic::riscv_vlseg6_mask:
1580 case Intrinsic::riscv_vlseg7_mask:
1581 case Intrinsic::riscv_vlseg8_mask: {
1582 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);
1583 return;
1584 }
1585 case Intrinsic::riscv_vlsseg2:
1586 case Intrinsic::riscv_vlsseg3:
1587 case Intrinsic::riscv_vlsseg4:
1588 case Intrinsic::riscv_vlsseg5:
1589 case Intrinsic::riscv_vlsseg6:
1590 case Intrinsic::riscv_vlsseg7:
1591 case Intrinsic::riscv_vlsseg8: {
1592 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);
1593 return;
1594 }
1595 case Intrinsic::riscv_vlsseg2_mask:
1596 case Intrinsic::riscv_vlsseg3_mask:
1597 case Intrinsic::riscv_vlsseg4_mask:
1598 case Intrinsic::riscv_vlsseg5_mask:
1599 case Intrinsic::riscv_vlsseg6_mask:
1600 case Intrinsic::riscv_vlsseg7_mask:
1601 case Intrinsic::riscv_vlsseg8_mask: {
1602 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);
1603 return;
1604 }
1605 case Intrinsic::riscv_vloxseg2:
1606 case Intrinsic::riscv_vloxseg3:
1607 case Intrinsic::riscv_vloxseg4:
1608 case Intrinsic::riscv_vloxseg5:
1609 case Intrinsic::riscv_vloxseg6:
1610 case Intrinsic::riscv_vloxseg7:
1611 case Intrinsic::riscv_vloxseg8:
1612 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);
1613 return;
1614 case Intrinsic::riscv_vluxseg2:
1615 case Intrinsic::riscv_vluxseg3:
1616 case Intrinsic::riscv_vluxseg4:
1617 case Intrinsic::riscv_vluxseg5:
1618 case Intrinsic::riscv_vluxseg6:
1619 case Intrinsic::riscv_vluxseg7:
1620 case Intrinsic::riscv_vluxseg8:
1621 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);
1622 return;
1623 case Intrinsic::riscv_vloxseg2_mask:
1624 case Intrinsic::riscv_vloxseg3_mask:
1625 case Intrinsic::riscv_vloxseg4_mask:
1626 case Intrinsic::riscv_vloxseg5_mask:
1627 case Intrinsic::riscv_vloxseg6_mask:
1628 case Intrinsic::riscv_vloxseg7_mask:
1629 case Intrinsic::riscv_vloxseg8_mask:
1630 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);
1631 return;
1632 case Intrinsic::riscv_vluxseg2_mask:
1633 case Intrinsic::riscv_vluxseg3_mask:
1634 case Intrinsic::riscv_vluxseg4_mask:
1635 case Intrinsic::riscv_vluxseg5_mask:
1636 case Intrinsic::riscv_vluxseg6_mask:
1637 case Intrinsic::riscv_vluxseg7_mask:
1638 case Intrinsic::riscv_vluxseg8_mask:
1639 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
1640 return;
1641 case Intrinsic::riscv_vlseg8ff:
1642 case Intrinsic::riscv_vlseg7ff:
1643 case Intrinsic::riscv_vlseg6ff:
1644 case Intrinsic::riscv_vlseg5ff:
1645 case Intrinsic::riscv_vlseg4ff:
1646 case Intrinsic::riscv_vlseg3ff:
1647 case Intrinsic::riscv_vlseg2ff: {
1648 selectVLSEGFF(Node, /*IsMasked*/ false);
1649 return;
1650 }
1651 case Intrinsic::riscv_vlseg8ff_mask:
1652 case Intrinsic::riscv_vlseg7ff_mask:
1653 case Intrinsic::riscv_vlseg6ff_mask:
1654 case Intrinsic::riscv_vlseg5ff_mask:
1655 case Intrinsic::riscv_vlseg4ff_mask:
1656 case Intrinsic::riscv_vlseg3ff_mask:
1657 case Intrinsic::riscv_vlseg2ff_mask: {
1658 selectVLSEGFF(Node, /*IsMasked*/ true);
1659 return;
1660 }
1661 case Intrinsic::riscv_vloxei:
1662 case Intrinsic::riscv_vloxei_mask:
1663 case Intrinsic::riscv_vluxei:
1664 case Intrinsic::riscv_vluxei_mask: {
1665 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
1666 IntNo == Intrinsic::riscv_vluxei_mask;
1667 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
1668 IntNo == Intrinsic::riscv_vloxei_mask;
1669
1670 MVT VT = Node->getSimpleValueType(0);
1671 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1672
1673 unsigned CurOp = 2;
1674 // Masked intrinsic only have TU version pseduo instructions.
1675 bool IsTU = IsMasked || !Node->getOperand(CurOp).isUndef();
1677 if (IsTU)
1678 Operands.push_back(Node->getOperand(CurOp++));
1679 else
1680 // Skip the undef passthru operand for nomask TA version pseudo
1681 CurOp++;
1682
1683 MVT IndexVT;
1684 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1685 /*IsStridedOrIndexed*/ true, Operands,
1686 /*IsLoad=*/true, &IndexVT);
1687
1689 "Element count mismatch");
1690
1692 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
1693 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
1694 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
1695 report_fatal_error("The V extension does not support EEW=64 for index "
1696 "values when XLEN=32");
1697 }
1698 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
1699 IsMasked, IsTU, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
1700 static_cast<unsigned>(IndexLMUL));
1701 MachineSDNode *Load =
1702 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1703
1704 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1705 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1706
1707 ReplaceNode(Node, Load);
1708 return;
1709 }
1710 case Intrinsic::riscv_vlm:
1711 case Intrinsic::riscv_vle:
1712 case Intrinsic::riscv_vle_mask:
1713 case Intrinsic::riscv_vlse:
1714 case Intrinsic::riscv_vlse_mask: {
1715 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
1716 IntNo == Intrinsic::riscv_vlse_mask;
1717 bool IsStrided =
1718 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
1719
1720 MVT VT = Node->getSimpleValueType(0);
1721 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1722
1723 unsigned CurOp = 2;
1724 // The riscv_vlm intrinsic are always tail agnostic and no passthru operand.
1725 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
1726 // Masked intrinsic only have TU version pseduo instructions.
1727 bool IsTU = HasPassthruOperand &&
1728 (IsMasked || !Node->getOperand(CurOp).isUndef());
1730 if (IsTU)
1731 Operands.push_back(Node->getOperand(CurOp++));
1732 else if (HasPassthruOperand)
1733 // Skip the undef passthru operand for nomask TA version pseudo
1734 CurOp++;
1735
1736 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
1737 Operands, /*IsLoad=*/true);
1738
1740 const RISCV::VLEPseudo *P =
1741 RISCV::getVLEPseudo(IsMasked, IsTU, IsStrided, /*FF*/ false, Log2SEW,
1742 static_cast<unsigned>(LMUL));
1743 MachineSDNode *Load =
1744 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1745
1746 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1747 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1748
1749 ReplaceNode(Node, Load);
1750 return;
1751 }
1752 case Intrinsic::riscv_vleff:
1753 case Intrinsic::riscv_vleff_mask: {
1754 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
1755
1756 MVT VT = Node->getSimpleValueType(0);
1757 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1758
1759 unsigned CurOp = 2;
1760 // Masked intrinsic only have TU version pseduo instructions.
1761 bool IsTU = IsMasked || !Node->getOperand(CurOp).isUndef();
1763 if (IsTU)
1764 Operands.push_back(Node->getOperand(CurOp++));
1765 else
1766 // Skip the undef passthru operand for nomask TA version pseudo
1767 CurOp++;
1768
1769 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1770 /*IsStridedOrIndexed*/ false, Operands,
1771 /*IsLoad=*/true);
1772
1774 const RISCV::VLEPseudo *P =
1775 RISCV::getVLEPseudo(IsMasked, IsTU, /*Strided*/ false, /*FF*/ true,
1776 Log2SEW, static_cast<unsigned>(LMUL));
1778 P->Pseudo, DL, Node->getVTList(), Operands);
1779 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1780 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1781
1782 ReplaceNode(Node, Load);
1783 return;
1784 }
1785 }
1786 break;
1787 }
1788 case ISD::INTRINSIC_VOID: {
1789 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
1790 switch (IntNo) {
1791 case Intrinsic::riscv_vsseg2:
1792 case Intrinsic::riscv_vsseg3:
1793 case Intrinsic::riscv_vsseg4:
1794 case Intrinsic::riscv_vsseg5:
1795 case Intrinsic::riscv_vsseg6:
1796 case Intrinsic::riscv_vsseg7:
1797 case Intrinsic::riscv_vsseg8: {
1798 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);
1799 return;
1800 }
1801 case Intrinsic::riscv_vsseg2_mask:
1802 case Intrinsic::riscv_vsseg3_mask:
1803 case Intrinsic::riscv_vsseg4_mask:
1804 case Intrinsic::riscv_vsseg5_mask:
1805 case Intrinsic::riscv_vsseg6_mask:
1806 case Intrinsic::riscv_vsseg7_mask:
1807 case Intrinsic::riscv_vsseg8_mask: {
1808 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);
1809 return;
1810 }
1811 case Intrinsic::riscv_vssseg2:
1812 case Intrinsic::riscv_vssseg3:
1813 case Intrinsic::riscv_vssseg4:
1814 case Intrinsic::riscv_vssseg5:
1815 case Intrinsic::riscv_vssseg6:
1816 case Intrinsic::riscv_vssseg7:
1817 case Intrinsic::riscv_vssseg8: {
1818 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);
1819 return;
1820 }
1821 case Intrinsic::riscv_vssseg2_mask:
1822 case Intrinsic::riscv_vssseg3_mask:
1823 case Intrinsic::riscv_vssseg4_mask:
1824 case Intrinsic::riscv_vssseg5_mask:
1825 case Intrinsic::riscv_vssseg6_mask:
1826 case Intrinsic::riscv_vssseg7_mask:
1827 case Intrinsic::riscv_vssseg8_mask: {
1828 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);
1829 return;
1830 }
1831 case Intrinsic::riscv_vsoxseg2:
1832 case Intrinsic::riscv_vsoxseg3:
1833 case Intrinsic::riscv_vsoxseg4:
1834 case Intrinsic::riscv_vsoxseg5:
1835 case Intrinsic::riscv_vsoxseg6:
1836 case Intrinsic::riscv_vsoxseg7:
1837 case Intrinsic::riscv_vsoxseg8:
1838 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);
1839 return;
1840 case Intrinsic::riscv_vsuxseg2:
1841 case Intrinsic::riscv_vsuxseg3:
1842 case Intrinsic::riscv_vsuxseg4:
1843 case Intrinsic::riscv_vsuxseg5:
1844 case Intrinsic::riscv_vsuxseg6:
1845 case Intrinsic::riscv_vsuxseg7:
1846 case Intrinsic::riscv_vsuxseg8:
1847 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);
1848 return;
1849 case Intrinsic::riscv_vsoxseg2_mask:
1850 case Intrinsic::riscv_vsoxseg3_mask:
1851 case Intrinsic::riscv_vsoxseg4_mask:
1852 case Intrinsic::riscv_vsoxseg5_mask:
1853 case Intrinsic::riscv_vsoxseg6_mask:
1854 case Intrinsic::riscv_vsoxseg7_mask:
1855 case Intrinsic::riscv_vsoxseg8_mask:
1856 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);
1857 return;
1858 case Intrinsic::riscv_vsuxseg2_mask:
1859 case Intrinsic::riscv_vsuxseg3_mask:
1860 case Intrinsic::riscv_vsuxseg4_mask:
1861 case Intrinsic::riscv_vsuxseg5_mask:
1862 case Intrinsic::riscv_vsuxseg6_mask:
1863 case Intrinsic::riscv_vsuxseg7_mask:
1864 case Intrinsic::riscv_vsuxseg8_mask:
1865 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
1866 return;
1867 case Intrinsic::riscv_vsoxei:
1868 case Intrinsic::riscv_vsoxei_mask:
1869 case Intrinsic::riscv_vsuxei:
1870 case Intrinsic::riscv_vsuxei_mask: {
1871 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
1872 IntNo == Intrinsic::riscv_vsuxei_mask;
1873 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
1874 IntNo == Intrinsic::riscv_vsoxei_mask;
1875
1876 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
1877 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1878
1879 unsigned CurOp = 2;
1881 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
1882
1883 MVT IndexVT;
1884 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1885 /*IsStridedOrIndexed*/ true, Operands,
1886 /*IsLoad=*/false, &IndexVT);
1887
1889 "Element count mismatch");
1890
1892 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
1893 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
1894 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
1895 report_fatal_error("The V extension does not support EEW=64 for index "
1896 "values when XLEN=32");
1897 }
1898 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
1899 IsMasked, /*TU*/ false, IsOrdered, IndexLog2EEW,
1900 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
1901 MachineSDNode *Store =
1902 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1903
1904 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1905 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
1906
1907 ReplaceNode(Node, Store);
1908 return;
1909 }
1910 case Intrinsic::riscv_vsm:
1911 case Intrinsic::riscv_vse:
1912 case Intrinsic::riscv_vse_mask:
1913 case Intrinsic::riscv_vsse:
1914 case Intrinsic::riscv_vsse_mask: {
1915 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
1916 IntNo == Intrinsic::riscv_vsse_mask;
1917 bool IsStrided =
1918 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
1919
1920 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
1921 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1922
1923 unsigned CurOp = 2;
1925 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
1926
1927 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
1928 Operands);
1929
1931 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
1932 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
1933 MachineSDNode *Store =
1934 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1935 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1936 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
1937
1938 ReplaceNode(Node, Store);
1939 return;
1940 }
1941 }
1942 break;
1943 }
1944 case ISD::BITCAST: {
1945 MVT SrcVT = Node->getOperand(0).getSimpleValueType();
1946 // Just drop bitcasts between vectors if both are fixed or both are
1947 // scalable.
1948 if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
1949 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
1950 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
1951 CurDAG->RemoveDeadNode(Node);
1952 return;
1953 }
1954 break;
1955 }
1956 case ISD::INSERT_SUBVECTOR: {
1957 SDValue V = Node->getOperand(0);
1958 SDValue SubV = Node->getOperand(1);
1959 SDLoc DL(SubV);
1960 auto Idx = Node->getConstantOperandVal(2);
1961 MVT SubVecVT = SubV.getSimpleValueType();
1962
1963 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
1964 MVT SubVecContainerVT = SubVecVT;
1965 // Establish the correct scalable-vector types for any fixed-length type.
1966 if (SubVecVT.isFixedLengthVector())
1967 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
1968 if (VT.isFixedLengthVector())
1969 VT = TLI.getContainerForFixedLengthVector(VT);
1970
1971 const auto *TRI = Subtarget->getRegisterInfo();
1972 unsigned SubRegIdx;
1973 std::tie(SubRegIdx, Idx) =
1975 VT, SubVecContainerVT, Idx, TRI);
1976
1977 // If the Idx hasn't been completely eliminated then this is a subvector
1978 // insert which doesn't naturally align to a vector register. These must
1979 // be handled using instructions to manipulate the vector registers.
1980 if (Idx != 0)
1981 break;
1982
1983 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT);
1984 bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
1985 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
1986 SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
1987 (void)IsSubVecPartReg; // Silence unused variable warning without asserts.
1988 assert((!IsSubVecPartReg || V.isUndef()) &&
1989 "Expecting lowering to have created legal INSERT_SUBVECTORs when "
1990 "the subvector is smaller than a full-sized register");
1991
1992 // If we haven't set a SubRegIdx, then we must be going between
1993 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
1994 if (SubRegIdx == RISCV::NoSubRegister) {
1995 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(VT);
1997 InRegClassID &&
1998 "Unexpected subvector extraction");
1999 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2000 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
2001 DL, VT, SubV, RC);
2002 ReplaceNode(Node, NewNode);
2003 return;
2004 }
2005
2006 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV);
2007 ReplaceNode(Node, Insert.getNode());
2008 return;
2009 }
2011 SDValue V = Node->getOperand(0);
2012 auto Idx = Node->getConstantOperandVal(1);
2013 MVT InVT = V.getSimpleValueType();
2014 SDLoc DL(V);
2015
2016 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2017 MVT SubVecContainerVT = VT;
2018 // Establish the correct scalable-vector types for any fixed-length type.
2019 if (VT.isFixedLengthVector())
2020 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
2021 if (InVT.isFixedLengthVector())
2022 InVT = TLI.getContainerForFixedLengthVector(InVT);
2023
2024 const auto *TRI = Subtarget->getRegisterInfo();
2025 unsigned SubRegIdx;
2026 std::tie(SubRegIdx, Idx) =
2028 InVT, SubVecContainerVT, Idx, TRI);
2029
2030 // If the Idx hasn't been completely eliminated then this is a subvector
2031 // extract which doesn't naturally align to a vector register. These must
2032 // be handled using instructions to manipulate the vector registers.
2033 if (Idx != 0)
2034 break;
2035
2036 // If we haven't set a SubRegIdx, then we must be going between
2037 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
2038 if (SubRegIdx == RISCV::NoSubRegister) {
2039 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT);
2041 InRegClassID &&
2042 "Unexpected subvector extraction");
2043 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2044 SDNode *NewNode =
2045 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
2046 ReplaceNode(Node, NewNode);
2047 return;
2048 }
2049
2050 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
2051 ReplaceNode(Node, Extract.getNode());
2052 return;
2053 }
2057 case RISCVISD::VFMV_V_F_VL: {
2058 // Only if we have optimized zero-stride vector load.
2059 if (!Subtarget->hasOptimizedZeroStrideLoad())
2060 break;
2061
2062 // Try to match splat of a scalar load to a strided load with stride of x0.
2063 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
2064 Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
2065 if (!Node->getOperand(0).isUndef())
2066 break;
2067 SDValue Src = Node->getOperand(1);
2068 auto *Ld = dyn_cast<LoadSDNode>(Src);
2069 if (!Ld)
2070 break;
2071 EVT MemVT = Ld->getMemoryVT();
2072 // The memory VT should be the same size as the element type.
2073 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
2074 break;
2075 if (!IsProfitableToFold(Src, Node, Node) ||
2076 !IsLegalToFold(Src, Node, Node, TM.getOptLevel()))
2077 break;
2078
2079 SDValue VL;
2080 if (IsScalarMove) {
2081 // We could deal with more VL if we update the VSETVLI insert pass to
2082 // avoid introducing more VSETVLI.
2083 if (!isOneConstant(Node->getOperand(2)))
2084 break;
2085 selectVLOp(Node->getOperand(2), VL);
2086 } else
2087 selectVLOp(Node->getOperand(2), VL);
2088
2089 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2091
2092 SDValue Operands[] = {Ld->getBasePtr(),
2093 CurDAG->getRegister(RISCV::X0, XLenVT), VL, SEW,
2094 Ld->getChain()};
2095
2097 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
2098 /*IsMasked*/ false, /*IsTU*/ false, /*IsStrided*/ true, /*FF*/ false,
2099 Log2SEW, static_cast<unsigned>(LMUL));
2100 MachineSDNode *Load =
2101 CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands);
2102 // Update the chain.
2103 ReplaceUses(Src.getValue(1), SDValue(Load, 1));
2104 // Record the mem-refs
2105 CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});
2106 // Replace the splat with the vlse.
2107 ReplaceNode(Node, Load);
2108 return;
2109 }
2110 }
2111
2112 // Select the default instruction.
2113 SelectCode(Node);
2114}
2115
2117 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
2118 switch (ConstraintID) {
2120 // We just support simple memory operands that have a single address
2121 // operand and need no special handling.
2122 OutOps.push_back(Op);
2123 return false;
2125 OutOps.push_back(Op);
2126 return false;
2127 default:
2128 break;
2129 }
2130
2131 return true;
2132}
2133
2135 SDValue &Offset) {
2136 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
2137 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
2138 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT());
2139 return true;
2140 }
2141
2142 return false;
2143}
2144
2145// Select a frame index and an optional immediate offset from an ADD or OR.
2147 SDValue &Offset) {
2149 return true;
2150
2152 return false;
2153
2154 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
2155 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2156 if (isInt<12>(CVal)) {
2157 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(),
2158 Subtarget->getXLenVT());
2160 Subtarget->getXLenVT());
2161 return true;
2162 }
2163 }
2164
2165 return false;
2166}
2167
2168// Fold constant addresses.
2169static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
2170 const MVT VT, const RISCVSubtarget *Subtarget,
2172 if (!isa<ConstantSDNode>(Addr))
2173 return false;
2174
2175 int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue();
2176
2177 // If the constant is a simm12, we can fold the whole constant and use X0 as
2178 // the base. If the constant can be materialized with LUI+simm12, use LUI as
2179 // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
2180 int64_t Lo12 = SignExtend64<12>(CVal);
2181 int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
2182 if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
2183 if (Hi) {
2184 int64_t Hi20 = (Hi >> 12) & 0xfffff;
2185 Base = SDValue(
2186 CurDAG->getMachineNode(RISCV::LUI, DL, VT,
2187 CurDAG->getTargetConstant(Hi20, DL, VT)),
2188 0);
2189 } else {
2190 Base = CurDAG->getRegister(RISCV::X0, VT);
2191 }
2192 Offset = CurDAG->getTargetConstant(Lo12, DL, VT);
2193 return true;
2194 }
2195
2196 // Ask how constant materialization would handle this constant.
2198 RISCVMatInt::generateInstSeq(CVal, Subtarget->getFeatureBits());
2199
2200 // If the last instruction would be an ADDI, we can fold its immediate and
2201 // emit the rest of the sequence as the base.
2202 if (Seq.back().getOpcode() != RISCV::ADDI)
2203 return false;
2204 Lo12 = Seq.back().getImm();
2205
2206 // Drop the last instruction.
2207 Seq.pop_back();
2208 assert(!Seq.empty() && "Expected more instructions in sequence");
2209
2210 Base = selectImmSeq(CurDAG, DL, VT, Seq);
2211 Offset = CurDAG->getTargetConstant(Lo12, DL, VT);
2212 return true;
2213}
2214
2215// Is this ADD instruction only used as the base pointer of scalar loads and
2216// stores?
2218 for (auto *Use : Add->uses()) {
2219 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
2220 Use->getOpcode() != ISD::ATOMIC_LOAD &&
2221 Use->getOpcode() != ISD::ATOMIC_STORE)
2222 return false;
2223 EVT VT = cast<MemSDNode>(Use)->getMemoryVT();
2224 if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
2225 VT != MVT::f64)
2226 return false;
2227 // Don't allow stores of the value. It must be used as the address.
2228 if (Use->getOpcode() == ISD::STORE &&
2229 cast<StoreSDNode>(Use)->getValue() == Add)
2230 return false;
2231 if (Use->getOpcode() == ISD::ATOMIC_STORE &&
2232 cast<AtomicSDNode>(Use)->getVal() == Add)
2233 return false;
2234 }
2235
2236 return true;
2237}
2238
2240 unsigned MaxShiftAmount,
2242 SDValue &Scale) {
2243 EVT VT = Addr.getSimpleValueType();
2244 auto UnwrapShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
2245 SDValue &Shift) {
2246 uint64_t ShiftAmt = 0;
2247 Index = N;
2248
2249 if (N.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N.getOperand(1))) {
2250 // Only match shifts by a value in range [0, MaxShiftAmount].
2251 if (N.getConstantOperandVal(1) <= MaxShiftAmount) {
2252 Index = N.getOperand(0);
2253 ShiftAmt = N.getConstantOperandVal(1);
2254 }
2255 }
2256
2257 Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT);
2258 return ShiftAmt != 0;
2259 };
2260
2261 if (Addr.getOpcode() == ISD::ADD) {
2262 if (auto *C1 = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
2263 SDValue AddrB = Addr.getOperand(0);
2264 if (AddrB.getOpcode() == ISD::ADD &&
2265 UnwrapShl(AddrB.getOperand(0), Index, Scale) &&
2266 !isa<ConstantSDNode>(AddrB.getOperand(1)) &&
2267 isInt<12>(C1->getSExtValue())) {
2268 // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
2269 SDValue C1Val =
2270 CurDAG->getTargetConstant(C1->getZExtValue(), SDLoc(Addr), VT);
2271 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
2272 AddrB.getOperand(1), C1Val),
2273 0);
2274 return true;
2275 }
2276 } else if (UnwrapShl(Addr.getOperand(0), Index, Scale)) {
2277 Base = Addr.getOperand(1);
2278 return true;
2279 } else {
2280 UnwrapShl(Addr.getOperand(1), Index, Scale);
2281 Base = Addr.getOperand(0);
2282 return true;
2283 }
2284 } else if (UnwrapShl(Addr, Index, Scale)) {
2285 EVT VT = Addr.getValueType();
2286 Base = CurDAG->getRegister(RISCV::X0, VT);
2287 return true;
2288 }
2289
2290 return false;
2291}
2292
2294 SDValue &Offset) {
2296 return true;
2297
2298 SDLoc DL(Addr);
2299 MVT VT = Addr.getSimpleValueType();
2300
2301 if (Addr.getOpcode() == RISCVISD::ADD_LO) {
2302 Base = Addr.getOperand(0);
2303 Offset = Addr.getOperand(1);
2304 return true;
2305 }
2306
2308 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2309 if (isInt<12>(CVal)) {
2310 Base = Addr.getOperand(0);
2311 if (Base.getOpcode() == RISCVISD::ADD_LO) {
2312 SDValue LoOperand = Base.getOperand(1);
2313 if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) {
2314 // If the Lo in (ADD_LO hi, lo) is a global variable's address
2315 // (its low part, really), then we can rely on the alignment of that
2316 // variable to provide a margin of safety before low part can overflow
2317 // the 12 bits of the load/store offset. Check if CVal falls within
2318 // that margin; if so (low part + CVal) can't overflow.
2319 const DataLayout &DL = CurDAG->getDataLayout();
2320 Align Alignment = commonAlignment(
2321 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
2322 if (CVal == 0 || Alignment > CVal) {
2323 int64_t CombinedOffset = CVal + GA->getOffset();
2324 Base = Base.getOperand(0);
2326 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(),
2327 CombinedOffset, GA->getTargetFlags());
2328 return true;
2329 }
2330 }
2331 }
2332
2333 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2334 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2335 Offset = CurDAG->getTargetConstant(CVal, DL, VT);
2336 return true;
2337 }
2338 }
2339
2340 // Handle ADD with large immediates.
2341 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2342 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2343 assert(!isInt<12>(CVal) && "simm12 not already handled?");
2344
2345 // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
2346 // an ADDI for part of the offset and fold the rest into the load/store.
2347 // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
2348 if (isInt<12>(CVal / 2) && isInt<12>(CVal - CVal / 2)) {
2349 int64_t Adj = CVal < 0 ? -2048 : 2047;
2350 Base = SDValue(
2351 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
2352 CurDAG->getTargetConstant(Adj, DL, VT)),
2353 0);
2354 Offset = CurDAG->getTargetConstant(CVal - Adj, DL, VT);
2355 return true;
2356 }
2357
2358 // For larger immediates, we might be able to save one instruction from
2359 // constant materialization by folding the Lo12 bits of the immediate into
2360 // the address. We should only do this if the ADD is only used by loads and
2361 // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
2362 // separately with the full materialized immediate creating extra
2363 // instructions.
2364 if (isWorthFoldingAdd(Addr) &&
2365 selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2366 Offset)) {
2367 // Insert an ADD instruction with the materialized Hi52 bits.
2368 Base = SDValue(
2369 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2370 0);
2371 return true;
2372 }
2373 }
2374
2375 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset))
2376 return true;
2377
2378 Base = Addr;
2379 Offset = CurDAG->getTargetConstant(0, DL, VT);
2380 return true;
2381}
2382
2384 SDValue &ShAmt) {
2385 ShAmt = N;
2386
2387 // Shift instructions on RISCV only read the lower 5 or 6 bits of the shift
2388 // amount. If there is an AND on the shift amount, we can bypass it if it
2389 // doesn't affect any of those bits.
2390 if (ShAmt.getOpcode() == ISD::AND && isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2391 const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
2392
2393 // Since the max shift amount is a power of 2 we can subtract 1 to make a
2394 // mask that covers the bits needed to represent all shift amounts.
2395 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
2396 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
2397
2398 if (ShMask.isSubsetOf(AndMask)) {
2399 ShAmt = ShAmt.getOperand(0);
2400 } else {
2401 // SimplifyDemandedBits may have optimized the mask so try restoring any
2402 // bits that are known zero.
2403 KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0));
2404 if (!ShMask.isSubsetOf(AndMask | Known.Zero))
2405 return true;
2406 ShAmt = ShAmt.getOperand(0);
2407 }
2408 }
2409
2410 if (ShAmt.getOpcode() == ISD::ADD &&
2411 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2412 uint64_t Imm = ShAmt.getConstantOperandVal(1);
2413 // If we are shifting by X+N where N == 0 mod Size, then just shift by X
2414 // to avoid the ADD.
2415 if (Imm != 0 && Imm % ShiftWidth == 0) {
2416 ShAmt = ShAmt.getOperand(0);
2417 return true;
2418 }
2419 } else if (ShAmt.getOpcode() == ISD::SUB &&
2420 isa<ConstantSDNode>(ShAmt.getOperand(0))) {
2421 uint64_t Imm = ShAmt.getConstantOperandVal(0);
2422 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
2423 // generate a NEG instead of a SUB of a constant.
2424 if (Imm != 0 && Imm % ShiftWidth == 0) {
2425 SDLoc DL(ShAmt);
2426 EVT VT = ShAmt.getValueType();
2427 SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);
2428 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
2429 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
2430 ShAmt.getOperand(1));
2431 ShAmt = SDValue(Neg, 0);
2432 return true;
2433 }
2434 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
2435 // to generate a NOT instead of a SUB of a constant.
2436 if (Imm % ShiftWidth == ShiftWidth - 1) {
2437 SDLoc DL(ShAmt);
2438 EVT VT = ShAmt.getValueType();
2439 MachineSDNode *Not =
2440 CurDAG->getMachineNode(RISCV::XORI, DL, VT, ShAmt.getOperand(1),
2441 CurDAG->getTargetConstant(-1, DL, VT));
2442 ShAmt = SDValue(Not, 0);
2443 return true;
2444 }
2445 }
2446
2447 return true;
2448}
2449
2450/// RISC-V doesn't have general instructions for integer setne/seteq, but we can
2451/// check for equality with 0. This function emits instructions that convert the
2452/// seteq/setne into something that can be compared with 0.
2453/// When \p Equal is false, we match setne. When \p Equal is true, we match
2454/// seteq.
2456 SDValue &Val) {
2457 assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
2458 "Unexpected condition code!");
2459
2460 // We're looking for a setcc.
2461 if (N->getOpcode() != ISD::SETCC)
2462 return false;
2463
2464 // Must be an equality comparison.
2465 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
2466 if (CCVal != ExpectedCCVal)
2467 return false;
2468
2469 SDValue LHS = N->getOperand(0);
2470 SDValue RHS = N->getOperand(1);
2471
2472 if (!LHS.getValueType().isInteger())
2473 return false;
2474
2475 // If the RHS side is 0, we don't need any extra instructions, return the LHS.
2476 if (isNullConstant(RHS)) {
2477 Val = LHS;
2478 return true;
2479 }
2480
2481 SDLoc DL(N);
2482
2483 if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
2484 int64_t CVal = C->getSExtValue();
2485 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
2486 // non-zero otherwise.
2487 if (CVal == -2048) {
2488 Val =
2490 RISCV::XORI, DL, N->getValueType(0), LHS,
2491 CurDAG->getTargetConstant(CVal, DL, N->getValueType(0))),
2492 0);
2493 return true;
2494 }
2495 // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the
2496 // LHS is equal to the RHS and non-zero otherwise.
2497 if (isInt<12>(CVal) || CVal == 2048) {
2498 Val =
2500 RISCV::ADDI, DL, N->getValueType(0), LHS,
2501 CurDAG->getTargetConstant(-CVal, DL, N->getValueType(0))),
2502 0);
2503 return true;
2504 }
2505 }
2506
2507 // If nothing else we can XOR the LHS and RHS to produce zero if they are
2508 // equal and a non-zero value if they aren't.
2509 Val = SDValue(
2510 CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0);
2511 return true;
2512}
2513
2515 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
2516 cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) {
2517 Val = N.getOperand(0);
2518 return true;
2519 }
2520
2521 auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
2522 if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1)))
2523 return N;
2524
2525 SDValue N0 = N.getOperand(0);
2526 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
2527 N.getConstantOperandVal(1) == ShiftAmt &&
2528 N0.getConstantOperandVal(1) == ShiftAmt)
2529 return N0.getOperand(0);
2530
2531 return N;
2532 };
2533
2534 MVT VT = N.getSimpleValueType();
2535 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) {
2536 Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
2537 return true;
2538 }
2539
2540 return false;
2541}
2542
2544 if (N.getOpcode() == ISD::AND) {
2545 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
2546 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
2547 Val = N.getOperand(0);
2548 return true;
2549 }
2550 }
2551 MVT VT = N.getSimpleValueType();
2552 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits);
2553 if (CurDAG->MaskedValueIsZero(N, Mask)) {
2554 Val = N;
2555 return true;
2556 }
2557
2558 return false;
2559}
2560
2561/// Look for various patterns that can be done with a SHL that can be folded
2562/// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
2563/// SHXADD we are trying to match.
2565 SDValue &Val) {
2566 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
2567 SDValue N0 = N.getOperand(0);
2568
2569 bool LeftShift = N0.getOpcode() == ISD::SHL;
2570 if ((LeftShift || N0.getOpcode() == ISD::SRL) &&
2571 isa<ConstantSDNode>(N0.getOperand(1))) {
2572 uint64_t Mask = N.getConstantOperandVal(1);
2573 unsigned C2 = N0.getConstantOperandVal(1);
2574
2575 unsigned XLen = Subtarget->getXLen();
2576 if (LeftShift)
2577 Mask &= maskTrailingZeros<uint64_t>(C2);
2578 else
2579 Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
2580
2581 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
2582 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
2583 // followed by a SHXADD with c3 for the X amount.
2584 if (isShiftedMask_64(Mask)) {
2585 unsigned Leading = XLen - llvm::bit_width(Mask);
2586 unsigned Trailing = llvm::countr_zero(Mask);
2587 if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) {
2588 SDLoc DL(N);
2589 EVT VT = N.getValueType();
2591 RISCV::SRLI, DL, VT, N0.getOperand(0),
2592 CurDAG->getTargetConstant(Trailing - C2, DL, VT)),
2593 0);
2594 return true;
2595 }
2596 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
2597 // leading zeros and c3 trailing zeros. We can use an SRLI by C3
2598 // followed by a SHXADD using c3 for the X amount.
2599 if (!LeftShift && Leading == C2 && Trailing == ShAmt) {
2600 SDLoc DL(N);
2601 EVT VT = N.getValueType();
2602 Val = SDValue(
2604 RISCV::SRLI, DL, VT, N0.getOperand(0),
2605 CurDAG->getTargetConstant(Leading + Trailing, DL, VT)),
2606 0);
2607 return true;
2608 }
2609 }
2610 }
2611 }
2612
2613 bool LeftShift = N.getOpcode() == ISD::SHL;
2614 if ((LeftShift || N.getOpcode() == ISD::SRL) &&
2615 isa<ConstantSDNode>(N.getOperand(1))) {
2616 SDValue N0 = N.getOperand(0);
2617 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
2618 isa<ConstantSDNode>(N0.getOperand(1))) {
2619 uint64_t Mask = N0.getConstantOperandVal(1);
2620 if (isShiftedMask_64(Mask)) {
2621 unsigned C1 = N.getConstantOperandVal(1);
2622 unsigned XLen = Subtarget->getXLen();
2623 unsigned Leading = XLen - llvm::bit_width(Mask);
2624 unsigned Trailing = llvm::countr_zero(Mask);
2625 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
2626 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
2627 if (LeftShift && Leading == 32 && Trailing > 0 &&
2628 (Trailing + C1) == ShAmt) {
2629 SDLoc DL(N);
2630 EVT VT = N.getValueType();
2632 RISCV::SRLIW, DL, VT, N0.getOperand(0),
2633 CurDAG->getTargetConstant(Trailing, DL, VT)),
2634 0);
2635 return true;
2636 }
2637 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
2638 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
2639 if (!LeftShift && Leading == 32 && Trailing > C1 &&
2640 (Trailing - C1) == ShAmt) {
2641 SDLoc DL(N);
2642 EVT VT = N.getValueType();
2644 RISCV::SRLIW, DL, VT, N0.getOperand(0),
2645 CurDAG->getTargetConstant(Trailing, DL, VT)),
2646 0);
2647 return true;
2648 }
2649 }
2650 }
2651 }
2652
2653 return false;
2654}
2655
2656/// Look for various patterns that can be done with a SHL that can be folded
2657/// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
2658/// SHXADD_UW we are trying to match.
2660 SDValue &Val) {
2661 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) &&
2662 N.hasOneUse()) {
2663 SDValue N0 = N.getOperand(0);
2664 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
2665 N0.hasOneUse()) {
2666 uint64_t Mask = N.getConstantOperandVal(1);
2667 unsigned C2 = N0.getConstantOperandVal(1);
2668
2669 Mask &= maskTrailingZeros<uint64_t>(C2);
2670
2671 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
2672 // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
2673 // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
2674 if (isShiftedMask_64(Mask)) {
2675 unsigned Leading = llvm::countl_zero(Mask);
2676 unsigned Trailing = llvm::countr_zero(Mask);
2677 if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
2678 SDLoc DL(N);
2679 EVT VT = N.getValueType();
2681 RISCV::SLLI, DL, VT, N0.getOperand(0),
2682 CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)),
2683 0);
2684 return true;
2685 }
2686 }
2687 }
2688 }
2689
2690 return false;
2691}
2692
2693// Return true if all users of this SDNode* only consume the lower \p Bits.
2694// This can be used to form W instructions for add/sub/mul/shl even when the
2695// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
2696// SimplifyDemandedBits has made it so some users see a sext_inreg and some
2697// don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
2698// the add/sub/mul/shl to become non-W instructions. By checking the users we
2699// may be able to use a W instruction and CSE with the other instruction if
2700// this has happened. We could try to detect that the CSE opportunity exists
2701// before doing this, but that would be more complicated.
2703 const unsigned Depth) const {
2704 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
2705 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
2706 Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
2707 Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
2708 Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
2709 isa<ConstantSDNode>(Node) || Depth != 0) &&
2710 "Unexpected opcode");
2711
2713 return false;
2714
2715 for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) {
2716 SDNode *User = *UI;
2717 // Users of this node should have already been instruction selected
2718 if (!User->isMachineOpcode())
2719 return false;
2720
2721 // TODO: Add more opcodes?
2722 switch (User->getMachineOpcode()) {
2723 default:
2724 return false;
2725 case RISCV::ADDW:
2726 case RISCV::ADDIW:
2727 case RISCV::SUBW:
2728 case RISCV::MULW:
2729 case RISCV::SLLW:
2730 case RISCV::SLLIW:
2731 case RISCV::SRAW:
2732 case RISCV::SRAIW:
2733 case RISCV::SRLW:
2734 case RISCV::SRLIW:
2735 case RISCV::DIVW:
2736 case RISCV::DIVUW:
2737 case RISCV::REMW:
2738 case RISCV::REMUW:
2739 case RISCV::ROLW:
2740 case RISCV::RORW:
2741 case RISCV::RORIW:
2742 case RISCV::CLZW:
2743 case RISCV::CTZW:
2744 case RISCV::CPOPW:
2745 case RISCV::SLLI_UW:
2746 case RISCV::FMV_W_X:
2747 case RISCV::FCVT_H_W:
2748 case RISCV::FCVT_H_WU:
2749 case RISCV::FCVT_S_W:
2750 case RISCV::FCVT_S_WU:
2751 case RISCV::FCVT_D_W:
2752 case RISCV::FCVT_D_WU:
2753 case RISCV::TH_REVW:
2754 case RISCV::TH_SRRIW:
2755 if (Bits < 32)
2756 return false;
2757 break;
2758 case RISCV::SLL:
2759 case RISCV::SRA:
2760 case RISCV::SRL:
2761 case RISCV::ROL:
2762 case RISCV::ROR:
2763 case RISCV::BSET:
2764 case RISCV::BCLR:
2765 case RISCV::BINV:
2766 // Shift amount operands only use log2(Xlen) bits.
2767 if (UI.getOperandNo() != 1 || Bits < Log2_32(Subtarget->getXLen()))
2768 return false;
2769 break;
2770 case RISCV::SLLI:
2771 // SLLI only uses the lower (XLen - ShAmt) bits.
2772 if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(1))
2773 return false;
2774 break;
2775 case RISCV::ANDI:
2776 if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1)))
2777 break;
2778 goto RecCheck;
2779 case RISCV::ORI: {
2780 uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
2781 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
2782 break;
2783 [[fallthrough]];
2784 }
2785 case RISCV::AND:
2786 case RISCV::OR:
2787 case RISCV::XOR:
2788 case RISCV::XORI:
2789 case RISCV::ANDN:
2790 case RISCV::ORN:
2791 case RISCV::XNOR:
2792 case RISCV::SH1ADD:
2793 case RISCV::SH2ADD:
2794 case RISCV::SH3ADD:
2795 RecCheck:
2796 if (hasAllNBitUsers(User, Bits, Depth + 1))
2797 break;
2798 return false;
2799 case RISCV::SRLI: {
2800 unsigned ShAmt = User->getConstantOperandVal(1);
2801 // If we are shifting right by less than Bits, and users don't demand any
2802 // bits that were shifted into [Bits-1:0], then we can consider this as an
2803 // N-Bit user.
2804 if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1))
2805 break;
2806 return false;
2807 }
2808 case RISCV::SEXT_B:
2809 case RISCV::PACKH:
2810 if (Bits < 8)
2811 return false;
2812 break;
2813 case RISCV::SEXT_H:
2814 case RISCV::FMV_H_X:
2815 case RISCV::ZEXT_H_RV32:
2816 case RISCV::ZEXT_H_RV64:
2817 case RISCV::PACKW:
2818 if (Bits < 16)
2819 return false;
2820 break;
2821 case RISCV::PACK:
2822 if (Bits < (Subtarget->getXLen() / 2))
2823 return false;
2824 break;
2825 case RISCV::ADD_UW:
2826 case RISCV::SH1ADD_UW:
2827 case RISCV::SH2ADD_UW:
2828 case RISCV::SH3ADD_UW:
2829 // The first operand to add.uw/shXadd.uw is implicitly zero extended from
2830 // 32 bits.
2831 if (UI.getOperandNo() != 0 || Bits < 32)
2832 return false;
2833 break;
2834 case RISCV::SB:
2835 if (UI.getOperandNo() != 0 || Bits < 8)
2836 return false;
2837 break;
2838 case RISCV::SH:
2839 if (UI.getOperandNo() != 0 || Bits < 16)
2840 return false;
2841 break;
2842 case RISCV::SW:
2843 if (UI.getOperandNo() != 0 || Bits < 32)
2844 return false;
2845 break;
2846 }
2847 }
2848
2849 return true;
2850}
2851
2852// Select a constant that can be represented as (sign_extend(imm5) << imm2).
2854 SDValue &Shl2) {
2855 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
2856 int64_t Offset = C->getSExtValue();
2857 int64_t Shift;
2858 for (Shift = 0; Shift < 4; Shift++)
2859 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
2860 break;
2861
2862 // Constant cannot be encoded.
2863 if (Shift == 4)
2864 return false;
2865
2866 EVT Ty = N->getValueType(0);
2867 Simm5 = CurDAG->getTargetConstant(Offset >> Shift, SDLoc(N), Ty);
2868 Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), Ty);
2869 return true;
2870 }
2871
2872 return false;
2873}
2874
2875// Select VL as a 5 bit immediate or a value that will become a register. This
2876// allows us to choose betwen VSETIVLI or VSETVLI later.
2878 auto *C = dyn_cast<ConstantSDNode>(N);
2879 if (C && isUInt<5>(C->getZExtValue())) {
2880 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
2881 N->getValueType(0));
2882 } else if (C && C->isAllOnes()) {
2883 // Treat all ones as VLMax.
2885 N->getValueType(0));
2886 } else if (isa<RegisterSDNode>(N) &&
2887 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {
2888 // All our VL operands use an operand that allows GPRNoX0 or an immediate
2889 // as the register class. Convert X0 to a special immediate to pass the
2890 // MachineVerifier. This is recognized specially by the vsetvli insertion
2891 // pass.
2893 N->getValueType(0));
2894 } else {
2895 VL = N;
2896 }
2897
2898 return true;
2899}
2900
2902 if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef())
2903 return false;
2904 assert(N.getNumOperands() == 3 && "Unexpected number of operands");
2905 SplatVal = N.getOperand(1);
2906 return true;
2907}
2908
2909using ValidateFn = bool (*)(int64_t);
2910
2912 SelectionDAG &DAG,
2913 const RISCVSubtarget &Subtarget,
2914 ValidateFn ValidateImm) {
2915 if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() ||
2916 !isa<ConstantSDNode>(N.getOperand(1)))
2917 return false;
2918 assert(N.getNumOperands() == 3 && "Unexpected number of operands");
2919
2920 int64_t SplatImm =
2921 cast<ConstantSDNode>(N.getOperand(1))->getSExtValue();
2922
2923 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
2924 // type is wider than the resulting vector element type: an implicit
2925 // truncation first takes place. Therefore, perform a manual
2926 // truncation/sign-extension in order to ignore any truncated bits and catch
2927 // any zero-extended immediate.
2928 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
2929 // sign-extending to (XLenVT -1).
2930 MVT XLenVT = Subtarget.getXLenVT();
2931 assert(XLenVT == N.getOperand(1).getSimpleValueType() &&
2932 "Unexpected splat operand type");
2933 MVT EltVT = N.getSimpleValueType().getVectorElementType();
2934 if (EltVT.bitsLT(XLenVT))
2935 SplatImm = SignExtend64(SplatImm, EltVT.getSizeInBits());
2936
2937 if (!ValidateImm(SplatImm))
2938 return false;
2939
2940 SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), XLenVT);
2941 return true;
2942}
2943
2945 return selectVSplatSimmHelper(N, SplatVal, *CurDAG, *Subtarget,
2946 [](int64_t Imm) { return isInt<5>(Imm); });
2947}
2948
2951 N, SplatVal, *CurDAG, *Subtarget,
2952 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; });
2953}
2954
2956 SDValue &SplatVal) {
2958 N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) {
2959 return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16);
2960 });
2961}
2962
2964 if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() ||
2965 !isa<ConstantSDNode>(N.getOperand(1)))
2966 return false;
2967
2968 int64_t SplatImm =
2969 cast<ConstantSDNode>(N.getOperand(1))->getSExtValue();
2970
2971 if (!isUInt<5>(SplatImm))
2972 return false;
2973
2974 SplatVal =
2975 CurDAG->getTargetConstant(SplatImm, SDLoc(N), Subtarget->getXLenVT());
2976
2977 return true;
2978}
2979
2981 ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N.getNode());
2982 if (!CFP)
2983 return false;
2984 const APFloat &APF = CFP->getValueAPF();
2985 // td can handle +0.0 already.
2986 if (APF.isPosZero())
2987 return false;
2988
2989 MVT VT = CFP->getSimpleValueType(0);
2990
2991 if (static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm(APF,
2992 VT) >= 0)
2993 return false;
2994
2995 MVT XLenVT = Subtarget->getXLenVT();
2996 if (VT == MVT::f64 && !Subtarget->is64Bit()) {
2997 assert(APF.isNegZero() && "Unexpected constant.");
2998 return false;
2999 }
3000 SDLoc DL(N);
3001 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
3002 *Subtarget);
3003 return true;
3004}
3005
3007 SDValue &Imm) {
3008 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3009 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);
3010
3011 if (!isInt<5>(ImmVal))
3012 return false;
3013
3014 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT());
3015 return true;
3016 }
3017
3018 return false;
3019}
3020
3021// Try to remove sext.w if the input is a W instruction or can be made into
3022// a W instruction cheaply.
3023bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
3024 // Look for the sext.w pattern, addiw rd, rs1, 0.
3025 if (N->getMachineOpcode() != RISCV::ADDIW ||
3026 !isNullConstant(N->getOperand(1)))
3027 return false;
3028
3029 SDValue N0 = N->getOperand(0);
3030 if (!N0.isMachineOpcode())
3031 return false;
3032
3033 switch (N0.getMachineOpcode()) {
3034 default:
3035 break;
3036 case RISCV::ADD:
3037 case RISCV::ADDI:
3038 case RISCV::SUB:
3039 case RISCV::MUL:
3040 case RISCV::SLLI: {
3041 // Convert sext.w+add/sub/mul to their W instructions. This will create
3042 // a new independent instruction. This improves latency.
3043 unsigned Opc;
3044 switch (N0.getMachineOpcode()) {
3045 default:
3046 llvm_unreachable("Unexpected opcode!");
3047 case RISCV::ADD: Opc = RISCV::ADDW; break;
3048 case RISCV::ADDI: Opc = RISCV::ADDIW; break;
3049 case RISCV::SUB: Opc = RISCV::SUBW; break;
3050 case RISCV::MUL: Opc = RISCV::MULW; break;
3051 case RISCV::SLLI: Opc = RISCV::SLLIW; break;
3052 }
3053
3054 SDValue N00 = N0.getOperand(0);
3055 SDValue N01 = N0.getOperand(1);
3056
3057 // Shift amount needs to be uimm5.
3058 if (N0.getMachineOpcode() == RISCV::SLLI &&
3059 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
3060 break;
3061
3062 SDNode *Result =
3063 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
3064 N00, N01);
3065 ReplaceUses(N, Result);
3066 return true;
3067 }
3068 case RISCV::ADDW:
3069 case RISCV::ADDIW:
3070 case RISCV::SUBW:
3071 case RISCV::MULW:
3072 case RISCV::SLLIW:
3073 case RISCV::PACKW:
3074 case RISCV::TH_MULAW:
3075 case RISCV::TH_MULAH:
3076 case RISCV::TH_MULSW:
3077 case RISCV::TH_MULSH:
3078 // Result is already sign extended just remove the sext.w.
3079 // NOTE: We only handle the nodes that are selected with hasAllWUsers.
3080 ReplaceUses(N, N0.getNode());
3081 return true;
3082 }
3083
3084 return false;
3085}
3086
3087// Return true if we can make sure mask of N is all-ones mask.
3088static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) {
3089 // Check that we're using V0 as a mask register.
3090 if (!isa<RegisterSDNode>(N->getOperand(MaskOpIdx)) ||
3091 cast<RegisterSDNode>(N->getOperand(MaskOpIdx))->getReg() != RISCV::V0)
3092 return false;
3093
3094 // The glued user defines V0.
3095 const auto *Glued = N->getGluedNode();
3096
3097 if (!Glued || Glued->getOpcode() != ISD::CopyToReg)
3098 return false;
3099
3100 // Check that we're defining V0 as a mask register.
3101 if (!isa<RegisterSDNode>(Glued->getOperand(1)) ||
3102 cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0)
3103 return false;
3104
3105 // Check the instruction defining V0; it needs to be a VMSET pseudo.
3106 SDValue MaskSetter = Glued->getOperand(2);
3107
3108 const auto IsVMSet = [](unsigned Opc) {
3109 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
3110 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
3111 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
3112 Opc == RISCV::PseudoVMSET_M_B8;
3113 };
3114
3115 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
3116 // undefined behaviour if it's the wrong bitwidth, so we could choose to
3117 // assume that it's all-ones? Same applies to its VL.
3118 return MaskSetter->isMachineOpcode() &&
3119 IsVMSet(MaskSetter.getMachineOpcode());
3120}
3121
3122// Optimize masked RVV pseudo instructions with a known all-ones mask to their
3123// corresponding "unmasked" pseudo versions. The mask we're interested in will
3124// take the form of a V0 physical register operand, with a glued
3125// register-setting instruction.
3126bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) {
3128 RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
3129 if (!I)
3130 return false;
3131
3132 unsigned MaskOpIdx = I->MaskOpIdx;
3133
3134 if (!usesAllOnesMask(N, MaskOpIdx))
3135 return false;
3136
3137 // Retrieve the tail policy operand index, if any.
3138 std::optional<unsigned> TailPolicyOpIdx;
3139 const RISCVInstrInfo &TII = *Subtarget->getInstrInfo();
3140 const MCInstrDesc &MaskedMCID = TII.get(N->getMachineOpcode());
3141
3142 bool IsTA = true;
3143 if (RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags)) {
3144 TailPolicyOpIdx = getVecPolicyOpIdx(N, MaskedMCID);
3145 if (!(N->getConstantOperandVal(*TailPolicyOpIdx) &
3147 // Keep the true-masked instruction when there is no unmasked TU
3148 // instruction
3149 if (I->UnmaskedTUPseudo == I->MaskedPseudo && !N->getOperand(0).isUndef())
3150 return false;
3151 // We can't use TA if the tie-operand is not IMPLICIT_DEF
3152 if (!N->getOperand(0).isUndef())
3153 IsTA = false;
3154 }
3155 }
3156
3157 unsigned Opc = IsTA ? I->UnmaskedPseudo : I->UnmaskedTUPseudo;
3158
3159 // Check that we're dropping the mask operand and any policy operand
3160 // when we transform to this unmasked pseudo. Additionally, if this insturtion
3161 // is tail agnostic, the unmasked instruction should not have a merge op.
3163 assert((IsTA != RISCVII::hasMergeOp(TSFlags)) &&
3166 "Unexpected pseudo to transform to");
3167 (void)TSFlags;
3168
3170 // Skip the merge operand at index 0 if IsTA
3171 for (unsigned I = IsTA, E = N->getNumOperands(); I != E; I++) {
3172 // Skip the mask, the policy, and the Glue.
3173 SDValue Op = N->getOperand(I);
3174 if (I == MaskOpIdx || I == TailPolicyOpIdx ||
3175 Op.getValueType() == MVT::Glue)
3176 continue;
3177 Ops.push_back(Op);
3178 }
3179
3180 // Transitively apply any node glued to our new node.
3181 const auto *Glued = N->getGluedNode();
3182 if (auto *TGlued = Glued->getGluedNode())
3183 Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1));
3184
3185 SDNode *Result = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3186 Result->setFlags(N->getFlags());
3187 ReplaceUses(N, Result);
3188
3189 return true;
3190}
3191
3192// Try to fold away VMERGE_VVM instructions. We handle these cases:
3193// -Masked TU VMERGE_VVM combined with an unmasked TA instruction instruction
3194// folds to a masked TU instruction. VMERGE_VVM must have have merge operand
3195// same as false operand.
3196// -Masked TA VMERGE_VVM combined with an unmasked TA instruction fold to a
3197// masked TA instruction.
3198// -Unmasked TU VMERGE_VVM combined with a masked MU TA instruction folds to
3199// masked TU instruction. Both instructions must have the same merge operand.
3200// VMERGE_VVM must have have merge operand same as false operand.
3201bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N, bool IsTA) {
3202 unsigned Offset = IsTA ? 0 : 1;
3203 uint64_t Policy = IsTA ? RISCVII::TAIL_AGNOSTIC : /*TUMU*/ 0;
3204
3205 SDValue False = N->getOperand(0 + Offset);
3206 SDValue True = N->getOperand(1 + Offset);
3207 SDValue Mask = N->getOperand(2 + Offset);
3208 SDValue VL = N->getOperand(3 + Offset);
3209
3210 assert(True.getResNo() == 0 &&
3211 "Expect True is the first output of an instruction.");
3212
3213 // Need N is the exactly one using True.
3214 if (!True.hasOneUse())
3215 return false;
3216
3217 if (!True.isMachineOpcode())
3218 return false;
3219
3220 unsigned TrueOpc = True.getMachineOpcode();
3221
3222 // Skip if True has merge operand.
3223 uint64_t TrueTSFlags = TII->get(TrueOpc).TSFlags;
3224 bool HasMergeOp = RISCVII::hasMergeOp(TrueTSFlags);
3225
3226 if (HasMergeOp) {
3227 // The vmerge instruction must be TU.
3228 if (IsTA)
3229 return false;
3230 SDValue MergeOpN = N->getOperand(0);
3231 SDValue MergeOpTrue = True->getOperand(0);
3232 // Both the vmerge instruction and the True instruction must have the same
3233 // merge operand. The vmerge instruction must have an all 1s mask since
3234 // we're going to keep the mask from the True instruction.
3235 // FIXME: Support mask agnostic True instruction which would have an
3236 // undef merge operand.
3237 if (MergeOpN != MergeOpTrue || !usesAllOnesMask(N, /* MaskOpIdx */ 3))
3238 return false;
3239 }
3240
3241 // Skip if True has side effect.
3242 // TODO: Support vleff and vlsegff.
3243 if (TII->get(TrueOpc).hasUnmodeledSideEffects())
3244 return false;
3245
3247 HasMergeOp ? RISCV::getMaskedPseudoInfo(TrueOpc)
3248 : RISCV::lookupMaskedIntrinsicByUnmaskedTA(TrueOpc);
3249
3250 if (!Info)
3251 return false;
3252
3253 // The last operand of a masked instruction may be glued.
3254 bool HasGlueOp = True->getGluedNode() != nullptr;
3255
3256 // The chain operand may exist either before the glued operands or in the last
3257 // position.
3258 unsigned TrueChainOpIdx = True.getNumOperands() - HasGlueOp - 1;
3259 bool HasChainOp =
3260 True.getOperand(TrueChainOpIdx).getValueType() == MVT::Other;
3261
3262 if (HasChainOp) {
3263 // Avoid creating cycles in the DAG. We must ensure that none of the other
3264 // operands depend on True through it's Chain.
3265 SmallVector<const SDNode *, 4> LoopWorklist;
3267 LoopWorklist.push_back(False.getNode());
3268 LoopWorklist.push_back(Mask.getNode());
3269 LoopWorklist.push_back(VL.getNode());
3270 if (SDNode *Glued = N->getGluedNode())
3271 LoopWorklist.push_back(Glued);
3272 if (SDNode::hasPredecessorHelper(True.getNode(), Visited, LoopWorklist))
3273 return false;
3274 }
3275
3276 // The vector policy operand may be present for masked intrinsics
3277 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TrueTSFlags);
3278 unsigned TrueVLIndex =
3279 True.getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
3280 SDValue TrueVL = True.getOperand(TrueVLIndex);
3281
3282 auto IsNoFPExcept = [this](SDValue N) {
3283 return !this->mayRaiseFPException(N.getNode()) ||
3284 N->getFlags().hasNoFPExcept();
3285 };
3286
3287 // Allow the peephole for non-exception True with VLMAX vector length, since
3288 // all the values after VL of N are dependent on Merge. VLMAX should be
3289 // lowered to (XLenVT -1).
3290 if (TrueVL != VL && !(IsNoFPExcept(True) && isAllOnesConstant(TrueVL)))
3291 return false;
3292
3293 SDLoc DL(N);
3294 unsigned MaskedOpc = Info->MaskedPseudo;
3296 "Expected instructions with mask have policy operand.");
3297 assert(RISCVII::hasMergeOp(TII->get(MaskedOpc).TSFlags) &&
3298 "Expected instructions with mask have merge operand.");
3299
3301 if (HasMergeOp) {
3302 Ops.append(True->op_begin(), True->op_begin() + TrueVLIndex);
3303 Ops.append({VL, /* SEW */ True.getOperand(TrueVLIndex + 1)});
3304 Ops.push_back(
3305 CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT()));
3306 Ops.append(True->op_begin() + TrueVLIndex + 3, True->op_end());
3307 } else {
3308 Ops.push_back(False);
3309 Ops.append(True->op_begin(), True->op_begin() + TrueVLIndex);
3310 Ops.append({Mask, VL, /* SEW */ True.getOperand(TrueVLIndex + 1)});
3311 Ops.push_back(
3312 CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT()));
3313
3314 // Result node should have chain operand of True.
3315 if (HasChainOp)
3316 Ops.push_back(True.getOperand(TrueChainOpIdx));
3317
3318 if (N->getGluedNode())
3319 Ops.push_back(N->getOperand(N->getNumOperands() - 1));
3320 }
3321
3322 SDNode *Result =
3323 CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops);
3324 Result->setFlags(True->getFlags());
3325
3326 // Replace vmerge.vvm node by Result.
3327 ReplaceUses(SDValue(N, 0), SDValue(Result, 0));
3328
3329 // Replace another value of True. E.g. chain and VL.
3330 for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx)
3331 ReplaceUses(True.getValue(Idx), SDValue(Result, Idx));
3332
3333 // Try to transform Result to unmasked intrinsic.
3334 doPeepholeMaskedRVV(Result);
3335 return true;
3336}
3337
3338// Transform (VMERGE_VVM_<LMUL>_TU false, false, true, allones, vl, sew) to
3339// (VADD_VI_<LMUL>_TU false, true, 0, vl, sew). It may decrease uses of VMSET.
3340bool RISCVDAGToDAGISel::performVMergeToVAdd(SDNode *N) {
3341 unsigned NewOpc;
3342 switch (N->getMachineOpcode()) {
3343 default:
3344 llvm_unreachable("Expected VMERGE_VVM_<LMUL>_TU instruction.");
3345 case RISCV::PseudoVMERGE_VVM_MF8_TU:
3346 NewOpc = RISCV::PseudoVADD_VI_MF8_TU;
3347 break;
3348 case RISCV::PseudoVMERGE_VVM_MF4_TU:
3349 NewOpc = RISCV::PseudoVADD_VI_MF4_TU;
3350 break;
3351 case RISCV::PseudoVMERGE_VVM_MF2_TU:
3352 NewOpc = RISCV::PseudoVADD_VI_MF2_TU;
3353 break;
3354 case RISCV::PseudoVMERGE_VVM_M1_TU:
3355 NewOpc = RISCV::PseudoVADD_VI_M1_TU;
3356 break;
3357 case RISCV::PseudoVMERGE_VVM_M2_TU:
3358 NewOpc = RISCV::PseudoVADD_VI_M2_TU;
3359 break;
3360 case RISCV::PseudoVMERGE_VVM_M4_TU:
3361 NewOpc = RISCV::PseudoVADD_VI_M4_TU;
3362 break;
3363 case RISCV::PseudoVMERGE_VVM_M8_TU:
3364 NewOpc = RISCV::PseudoVADD_VI_M8_TU;
3365 break;
3366 }
3367
3368 if (!usesAllOnesMask(N, /* MaskOpIdx */ 3))
3369 return false;
3370
3371 SDLoc DL(N);
3372 EVT VT = N->getValueType(0);
3373 SDValue Ops[] = {N->getOperand(1), N->getOperand(2),
3374 CurDAG->getTargetConstant(0, DL, Subtarget->getXLenVT()),
3375 N->getOperand(4), N->getOperand(5)};
3376 SDNode *Result = CurDAG->getMachineNode(NewOpc, DL, VT, Ops);
3377 ReplaceUses(N, Result);
3378 return true;
3379}
3380
3381bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
3382 bool MadeChange = false;
3384
3385 while (Position != CurDAG->allnodes_begin()) {
3386 SDNode *N = &*--Position;
3387 if (N->use_empty() || !N->isMachineOpcode())
3388 continue;
3389
3390 auto IsVMergeTU = [](unsigned Opcode) {
3391 return Opcode == RISCV::PseudoVMERGE_VVM_MF8_TU ||
3392 Opcode == RISCV::PseudoVMERGE_VVM_MF4_TU ||
3393 Opcode == RISCV::PseudoVMERGE_VVM_MF2_TU ||
3394 Opcode == RISCV::PseudoVMERGE_VVM_M1_TU ||
3395 Opcode == RISCV::PseudoVMERGE_VVM_M2_TU ||
3396 Opcode == RISCV::PseudoVMERGE_VVM_M4_TU ||
3397 Opcode == RISCV::PseudoVMERGE_VVM_M8_TU;
3398 };
3399
3400 auto IsVMergeTA = [](unsigned Opcode) {
3401 return Opcode == RISCV::PseudoVMERGE_VVM_MF8 ||
3402 Opcode == RISCV::PseudoVMERGE_VVM_MF4 ||
3403 Opcode == RISCV::PseudoVMERGE_VVM_MF2 ||
3404 Opcode == RISCV::PseudoVMERGE_VVM_M1 ||
3405 Opcode == RISCV::PseudoVMERGE_VVM_M2 ||
3406 Opcode == RISCV::PseudoVMERGE_VVM_M4 ||
3407 Opcode == RISCV::PseudoVMERGE_VVM_M8;
3408 };
3409
3410 unsigned Opc = N->getMachineOpcode();
3411 // The following optimizations require that the merge operand of N is same
3412 // as the false operand of N.
3413 if ((IsVMergeTU(Opc) && N->getOperand(0) == N->getOperand(1)) ||
3414 IsVMergeTA(Opc))
3415 MadeChange |= performCombineVMergeAndVOps(N, IsVMergeTA(Opc));
3416 if (IsVMergeTU(Opc) && N->getOperand(0) == N->getOperand(1))
3417 MadeChange |= performVMergeToVAdd(N);
3418 }
3419 return MadeChange;
3420}
3421
3422// This pass converts a legalized DAG into a RISCV-specific DAG, ready
3423// for instruction scheduling.
3425 CodeGenOpt::Level OptLevel) {
3426 return new RISCVDAGToDAGISel(TM, OptLevel);
3427}
3428
3429char RISCVDAGToDAGISel::ID = 0;
3430
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define DEBUG_TYPE
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
typename CallsiteContextGraph< DerivedCCG, FuncTy, CallTy >::FuncInfo FuncInfo
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
R600 Clause Merge
static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, int64_t Imm, const RISCVSubtarget &Subtarget)
static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx)
#define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b)
static bool isAllUndef(ArrayRef< SDValue > Values)
static bool isWorthFoldingAdd(SDValue Add)
static unsigned getLastNonGlueOrChainOpIdx(const SDNode *Node)
static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, RISCVMatInt::InstSeq &Seq)
static unsigned getVecPolicyOpIdx(const SDNode *Node, const MCInstrDesc &MCID)
#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix)
static bool selectVSplatSimmHelper(SDValue N, SDValue &SplatVal, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, ValidateFn ValidateImm)
static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, const RISCVSubtarget *Subtarget, SDValue Addr, SDValue &Base, SDValue &Offset)
bool(*)(int64_t) ValidateFn
#define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b)
unsigned Log2SEW
RISCVII::VLMUL VLMul
unsigned SEW
uint64_t TSFlags
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
#define PASS_NAME
Value * RHS
Value * LHS
bool isZero() const
Definition: APFloat.h:1265
APInt bitcastToAPInt() const
Definition: APFloat.h:1184
bool isPosZero() const
Definition: APFloat.h:1280
bool isNegZero() const
Definition: APFloat.h:1281
Class for arbitrary precision integers.
Definition: APInt.h:75
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:269
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1516
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
const APFloat & getValueAPF() const
uint64_t getZExtValue() const
int64_t getSExtValue() const
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
This class is used to form a handle around another node that is persistent and is updated across invo...
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by other flags.
Definition: MCInstrDesc.h:462
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
bool bitsLT(MVT VT) const
Return true if this has less bits than VT.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
MVT getVectorElementType() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
An SDNode that represents everything that will be needed to construct a MachineInstr.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
bool selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal, SDValue &Val)
RISC-V doesn't have general instructions for integer setne/seteq, but we can check for equality with ...
bool selectSExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectZExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectSHXADD_UWOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD_UW.
bool selectVSplatUimm5(SDValue N, SDValue &SplatVal)
bool hasAllNBitUsers(SDNode *Node, unsigned Bits, const unsigned Depth=0) const
void selectVSSEG(SDNode *Node, bool IsMasked, bool IsStrided)
bool SelectFrameAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset)
void selectVLSEGFF(SDNode *Node, bool IsMasked)
bool selectFPImm(SDValue N, SDValue &Imm)
bool selectSimm5Shl2(SDValue N, SDValue &Simm5, SDValue &Shl2)
bool hasAllHUsers(SDNode *Node) const
bool selectVSplatSimm5(SDValue N, SDValue &SplatVal)
bool selectRVVSimm5(SDValue N, unsigned Width, SDValue &Imm)
bool SelectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset)
bool hasAllWUsers(SDNode *Node) const
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset)
void Select(SDNode *Node) override
Main hook for targets to transform nodes into machine nodes.
bool selectVSplat(SDValue N, SDValue &SplatVal)
void addVectorLoadStoreOperands(SDNode *Node, unsigned SEWImm, const SDLoc &DL, unsigned CurOp, bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl< SDValue > &Operands, bool IsLoad=false, MVT *IndexVT=nullptr)
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
void selectVLXSEG(SDNode *Node, bool IsMasked, bool IsOrdered)
bool tryShrinkShlLogicImm(SDNode *Node)
void selectVSETVLI(SDNode *Node)
bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
bool selectVLOp(SDValue N, SDValue &VL)
bool trySignedBitfieldExtract(SDNode *Node)
void selectVSXSEG(SDNode *Node, bool IsMasked, bool IsOrdered)
bool selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal)
bool selectVSplatSimm5Plus1NonZero(SDValue N, SDValue &SplatVal)
void selectVLSEG(SDNode *Node, bool IsMasked, bool IsStrided)
bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt)
bool selectSHXADDOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD.
bool tryIndexedLoad(SDNode *Node)
bool SelectAddrRegRegScale(SDValue Addr, unsigned MaxShiftAmount, SDValue &Base, SDValue &Index, SDValue &Scale)
RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private RISCV-...
unsigned getXLen() const
bool hasVInstructions() const
const RISCVRegisterInfo * getRegisterInfo() const override
const RISCVInstrInfo * getInstrInfo() const override
const RISCVTargetLowering * getTargetLowering() const override
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
static unsigned getSubregIndexByMVT(MVT VT, unsigned Index)
static unsigned getRegClassIDForVecVT(MVT VT)
static RISCVII::VLMUL getLMUL(MVT VT)
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
SDNodeFlags getFlags() const
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
SDNode * getGluedNode() const
If this node has a glue operand, return the node to which the glue operand points.
op_iterator op_end() const
op_iterator op_begin() const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
unsigned getNumOperands() const
const TargetLowering * TLI
MachineFunction * MF
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const
IsProfitableToFold - Returns true if it's profitable to fold the specific operand node N of U during ...
bool mayRaiseFPException(SDNode *Node) const
Return whether the node may raise an FP exception.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
static bool IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, CodeGenOpt::Level OptLevel, bool IgnoreChains=false)
IsLegalToFold - Returns true if the specific operand node N of U can be folded during instruction sel...
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:721
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
Definition: SelectionDAG.h:551
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:478
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:448
allnodes_const_iterator allnodes_begin() const
Definition: SelectionDAG.h:531
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
allnodes_const_iterator allnodes_end() const
Definition: SelectionDAG.h:532
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:726
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getRegister(unsigned Reg, EVT VT)
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:773
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, uint64_t Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:675
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:799
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
Definition: SelectionDAG.h:560
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
bool empty() const
Definition: SmallVector.h:94
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:687
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
static constexpr TypeSize Fixed(ScalarTy ExactSize)
Definition: TypeSize.h:331
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:169
Iterator for intrusive lists based on ilist_node.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:119
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Level
Code generation optimization level.
Definition: CodeGen.h:57
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:749
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:558
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1178
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:978
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:898
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1174
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:626
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:704
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:572
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:794
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:679
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1396
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1447
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1492
static bool hasDummyMaskOp(uint64_t TSFlags)
static bool hasMergeOp(uint64_t TSFlags)
static bool hasVecPolicyOp(uint64_t TSFlags)
InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures)
static unsigned decodeVSEW(unsigned VSEW)
unsigned encodeVTYPE(RISCVII::VLMUL VLMUL, unsigned SEW, bool TailAgnostic, bool MaskAgnostic)
static constexpr int64_t VLMaxSentinel
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:406
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1819
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:271
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition: bit.h:281
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:179
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:286
unsigned M1(unsigned Val)
Definition: VE.h:468
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:382
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:245
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:292
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:274
FunctionPass * createRISCVISelDag(RISCVTargetMachine &TM, CodeGenOpt::Level OptLevel)
@ Add
Sum of integers.
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:557
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:34
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:373
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:149
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.