LLVM 22.0.0git
RISCVISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the RISC-V target.
10//
11//===----------------------------------------------------------------------===//
12
13#include "RISCVISelDAGToDAG.h"
17#include "RISCVISelLowering.h"
18#include "RISCVInstrInfo.h"
22#include "llvm/IR/IntrinsicsRISCV.h"
24#include "llvm/Support/Debug.h"
27
28using namespace llvm;
29
30#define DEBUG_TYPE "riscv-isel"
31#define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
32
34 "riscv-use-rematerializable-movimm", cl::Hidden,
35 cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
36 "constant materialization"),
37 cl::init(false));
38
39#define GET_DAGISEL_BODY RISCVDAGToDAGISel
40#include "RISCVGenDAGISel.inc"
41
43 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
44
45 bool MadeChange = false;
46 while (Position != CurDAG->allnodes_begin()) {
47 SDNode *N = &*--Position;
48 if (N->use_empty())
49 continue;
50
51 SDValue Result;
52 switch (N->getOpcode()) {
53 case ISD::SPLAT_VECTOR: {
54 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
55 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
56 MVT VT = N->getSimpleValueType(0);
57 unsigned Opc =
58 VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
59 SDLoc DL(N);
60 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
61 SDValue Src = N->getOperand(0);
62 if (VT.isInteger())
63 Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(),
64 N->getOperand(0));
65 Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL);
66 break;
67 }
68 case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: {
69 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
70 // load. Done after lowering and combining so that we have a chance to
71 // optimize this to VMV_V_X_VL when the upper bits aren't needed.
72 assert(N->getNumOperands() == 4 && "Unexpected number of operands");
73 MVT VT = N->getSimpleValueType(0);
74 SDValue Passthru = N->getOperand(0);
75 SDValue Lo = N->getOperand(1);
76 SDValue Hi = N->getOperand(2);
77 SDValue VL = N->getOperand(3);
78 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
79 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
80 "Unexpected VTs!");
81 MachineFunction &MF = CurDAG->getMachineFunction();
82 SDLoc DL(N);
83
84 // Create temporary stack for each expanding node.
85 SDValue StackSlot =
86 CurDAG->CreateStackTemporary(TypeSize::getFixed(8), Align(8));
87 int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex();
89
90 SDValue Chain = CurDAG->getEntryNode();
91 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
92
93 SDValue OffsetSlot =
94 CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::getFixed(4), DL);
95 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
96 Align(8));
97
98 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
99
100 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
101 SDValue IntID =
102 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
103 SDValue Ops[] = {Chain,
104 IntID,
105 Passthru,
106 StackSlot,
107 CurDAG->getRegister(RISCV::X0, MVT::i64),
108 VL};
109
110 Result = CurDAG->getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
111 MVT::i64, MPI, Align(8),
113 break;
114 }
115 case ISD::FP_EXTEND: {
116 // We only have vector patterns for riscv_fpextend_vl in isel.
117 SDLoc DL(N);
118 MVT VT = N->getSimpleValueType(0);
119 if (!VT.isVector())
120 break;
121 SDValue VLMAX = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
122 SDValue TrueMask = CurDAG->getNode(
123 RISCVISD::VMSET_VL, DL, VT.changeVectorElementType(MVT::i1), VLMAX);
124 Result = CurDAG->getNode(RISCVISD::FP_EXTEND_VL, DL, VT, N->getOperand(0),
125 TrueMask, VLMAX);
126 break;
127 }
128 }
129
130 if (Result) {
131 LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: ");
132 LLVM_DEBUG(N->dump(CurDAG));
133 LLVM_DEBUG(dbgs() << "\nNew: ");
134 LLVM_DEBUG(Result->dump(CurDAG));
135 LLVM_DEBUG(dbgs() << "\n");
136
137 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
138 MadeChange = true;
139 }
140 }
141
142 if (MadeChange)
143 CurDAG->RemoveDeadNodes();
144}
145
147 HandleSDNode Dummy(CurDAG->getRoot());
148 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
149
150 bool MadeChange = false;
151 while (Position != CurDAG->allnodes_begin()) {
152 SDNode *N = &*--Position;
153 // Skip dead nodes and any non-machine opcodes.
154 if (N->use_empty() || !N->isMachineOpcode())
155 continue;
156
157 MadeChange |= doPeepholeSExtW(N);
158
159 // FIXME: This is here only because the VMerge transform doesn't
160 // know how to handle masked true inputs. Once that has been moved
161 // to post-ISEL, this can be deleted as well.
162 MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N));
163 }
164
165 CurDAG->setRoot(Dummy.getValue());
166
167 // After we're done with everything else, convert IMPLICIT_DEF
168 // passthru operands to NoRegister. This is required to workaround
169 // an optimization deficiency in MachineCSE. This really should
170 // be merged back into each of the patterns (i.e. there's no good
171 // reason not to go directly to NoReg), but is being done this way
172 // to allow easy backporting.
173 MadeChange |= doPeepholeNoRegPassThru();
174
175 if (MadeChange)
176 CurDAG->RemoveDeadNodes();
177}
178
179static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
181 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);
182 for (const RISCVMatInt::Inst &Inst : Seq) {
183 SDValue SDImm = CurDAG->getSignedTargetConstant(Inst.getImm(), DL, VT);
184 SDNode *Result = nullptr;
185 switch (Inst.getOpndKind()) {
186 case RISCVMatInt::Imm:
187 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm);
188 break;
190 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg,
191 CurDAG->getRegister(RISCV::X0, VT));
192 break;
194 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg);
195 break;
197 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm);
198 break;
199 }
200
201 // Only the first instruction has X0 as its source.
202 SrcReg = SDValue(Result, 0);
203 }
204
205 return SrcReg;
206}
207
208static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
209 int64_t Imm, const RISCVSubtarget &Subtarget) {
211
212 // Use a rematerializable pseudo instruction for short sequences if enabled.
213 if (Seq.size() == 2 && UsePseudoMovImm)
214 return SDValue(
215 CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT,
216 CurDAG->getSignedTargetConstant(Imm, DL, VT)),
217 0);
218
219 // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
220 // worst an LUI+ADDIW. This will require an extra register, but avoids a
221 // constant pool.
222 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
223 // low and high 32 bits are the same and bit 31 and 63 are set.
224 if (Seq.size() > 3) {
225 unsigned ShiftAmt, AddOpc;
227 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
228 if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
229 SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
230
231 SDValue SLLI = SDValue(
232 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
233 CurDAG->getTargetConstant(ShiftAmt, DL, VT)),
234 0);
235 return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0);
236 }
237 }
238
239 // Otherwise, use the original sequence.
240 return selectImmSeq(CurDAG, DL, VT, Seq);
241}
242
244 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
245 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
246 bool IsLoad, MVT *IndexVT) {
247 SDValue Chain = Node->getOperand(0);
248
249 Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.
250
251 if (IsStridedOrIndexed) {
252 Operands.push_back(Node->getOperand(CurOp++)); // Index.
253 if (IndexVT)
254 *IndexVT = Operands.back()->getSimpleValueType(0);
255 }
256
257 if (IsMasked) {
258 SDValue Mask = Node->getOperand(CurOp++);
259 Operands.push_back(Mask);
260 }
261 SDValue VL;
262 selectVLOp(Node->getOperand(CurOp++), VL);
263 Operands.push_back(VL);
264
265 MVT XLenVT = Subtarget->getXLenVT();
266 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
267 Operands.push_back(SEWOp);
268
269 // At the IR layer, all the masked load intrinsics have policy operands,
270 // none of the others do. All have passthru operands. For our pseudos,
271 // all loads have policy operands.
272 if (IsLoad) {
274 if (IsMasked)
275 Policy = Node->getConstantOperandVal(CurOp++);
276 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
277 Operands.push_back(PolicyOp);
278 }
279
280 Operands.push_back(Chain); // Chain.
281}
282
283void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked,
284 bool IsStrided) {
285 SDLoc DL(Node);
286 MVT VT = Node->getSimpleValueType(0);
287 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
289
290 unsigned CurOp = 2;
292
293 Operands.push_back(Node->getOperand(CurOp++));
294
295 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
296 Operands, /*IsLoad=*/true);
297
298 const RISCV::VLSEGPseudo *P =
299 RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,
300 static_cast<unsigned>(LMUL));
301 MachineSDNode *Load =
302 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
303
304 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
305
306 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
307 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1));
308 CurDAG->RemoveDeadNode(Node);
309}
310
312 bool IsMasked) {
313 SDLoc DL(Node);
314 MVT VT = Node->getSimpleValueType(0);
315 MVT XLenVT = Subtarget->getXLenVT();
316 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
318
319 unsigned CurOp = 2;
321
322 Operands.push_back(Node->getOperand(CurOp++));
323
324 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
325 /*IsStridedOrIndexed*/ false, Operands,
326 /*IsLoad=*/true);
327
328 const RISCV::VLSEGPseudo *P =
329 RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,
330 Log2SEW, static_cast<unsigned>(LMUL));
331 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
332 XLenVT, MVT::Other, Operands);
333
334 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
335
336 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0)); // Result
337 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1)); // VL
338 ReplaceUses(SDValue(Node, 2), SDValue(Load, 2)); // Chain
339 CurDAG->RemoveDeadNode(Node);
340}
341
342void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked,
343 bool IsOrdered) {
344 SDLoc DL(Node);
345 MVT VT = Node->getSimpleValueType(0);
346 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
348
349 unsigned CurOp = 2;
351
352 Operands.push_back(Node->getOperand(CurOp++));
353
354 MVT IndexVT;
355 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
356 /*IsStridedOrIndexed*/ true, Operands,
357 /*IsLoad=*/true, &IndexVT);
358
359#ifndef NDEBUG
360 // Number of element = RVVBitsPerBlock * LMUL / SEW
361 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
362 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
363 if (DecodedLMUL.second)
364 ContainedTyNumElts /= DecodedLMUL.first;
365 else
366 ContainedTyNumElts *= DecodedLMUL.first;
367 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
368 "Element count mismatch");
369#endif
370
372 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
373 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
374 report_fatal_error("The V extension does not support EEW=64 for index "
375 "values when XLEN=32");
376 }
377 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
378 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
379 static_cast<unsigned>(IndexLMUL));
380 MachineSDNode *Load =
381 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
382
383 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
384
385 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
386 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1));
387 CurDAG->RemoveDeadNode(Node);
388}
389
390void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked,
391 bool IsStrided) {
392 SDLoc DL(Node);
393 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
394 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
396
397 unsigned CurOp = 2;
399
400 Operands.push_back(Node->getOperand(CurOp++));
401
402 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
403 Operands);
404
405 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
406 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
407 MachineSDNode *Store =
408 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
409
410 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
411
412 ReplaceNode(Node, Store);
413}
414
415void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked,
416 bool IsOrdered) {
417 SDLoc DL(Node);
418 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
419 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
421
422 unsigned CurOp = 2;
424
425 Operands.push_back(Node->getOperand(CurOp++));
426
427 MVT IndexVT;
428 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
429 /*IsStridedOrIndexed*/ true, Operands,
430 /*IsLoad=*/false, &IndexVT);
431
432#ifndef NDEBUG
433 // Number of element = RVVBitsPerBlock * LMUL / SEW
434 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
435 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
436 if (DecodedLMUL.second)
437 ContainedTyNumElts /= DecodedLMUL.first;
438 else
439 ContainedTyNumElts *= DecodedLMUL.first;
440 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
441 "Element count mismatch");
442#endif
443
445 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
446 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
447 report_fatal_error("The V extension does not support EEW=64 for index "
448 "values when XLEN=32");
449 }
450 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
451 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
452 static_cast<unsigned>(IndexLMUL));
453 MachineSDNode *Store =
454 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
455
456 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
457
458 ReplaceNode(Node, Store);
459}
460
462 if (!Subtarget->hasVInstructions())
463 return;
464
465 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
466
467 SDLoc DL(Node);
468 MVT XLenVT = Subtarget->getXLenVT();
469
470 unsigned IntNo = Node->getConstantOperandVal(0);
471
472 assert((IntNo == Intrinsic::riscv_vsetvli ||
473 IntNo == Intrinsic::riscv_vsetvlimax) &&
474 "Unexpected vsetvli intrinsic");
475
476 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
477 unsigned Offset = (VLMax ? 1 : 2);
478
479 assert(Node->getNumOperands() == Offset + 2 &&
480 "Unexpected number of operands");
481
482 unsigned SEW =
483 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
484 RISCVVType::VLMUL VLMul = static_cast<RISCVVType::VLMUL>(
485 Node->getConstantOperandVal(Offset + 1) & 0x7);
486
487 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
488 /*MaskAgnostic*/ true);
489 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
490
491 SDValue VLOperand;
492 unsigned Opcode = RISCV::PseudoVSETVLI;
493 if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
494 if (auto VLEN = Subtarget->getRealVLen())
495 if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())
496 VLMax = true;
497 }
498 if (VLMax || isAllOnesConstant(Node->getOperand(1))) {
499 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
500 Opcode = RISCV::PseudoVSETVLIX0;
501 } else {
502 VLOperand = Node->getOperand(1);
503
504 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
505 uint64_t AVL = C->getZExtValue();
506 if (isUInt<5>(AVL)) {
507 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
508 ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL,
509 XLenVT, VLImm, VTypeIOp));
510 return;
511 }
512 }
513 }
514
516 CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp));
517}
518
520 if (!Subtarget->hasVendorXSfmmbase())
521 return;
522
523 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
524
525 SDLoc DL(Node);
526 MVT XLenVT = Subtarget->getXLenVT();
527
528 unsigned IntNo = Node->getConstantOperandVal(0);
529
530 assert((IntNo == Intrinsic::riscv_sf_vsettnt ||
531 IntNo == Intrinsic::riscv_sf_vsettm ||
532 IntNo == Intrinsic::riscv_sf_vsettk) &&
533 "Unexpected XSfmm vset intrinsic");
534
535 unsigned SEW = RISCVVType::decodeVSEW(Node->getConstantOperandVal(2));
536 unsigned Widen = RISCVVType::decodeTWiden(Node->getConstantOperandVal(3));
537 unsigned PseudoOpCode =
538 IntNo == Intrinsic::riscv_sf_vsettnt ? RISCV::PseudoSF_VSETTNT
539 : IntNo == Intrinsic::riscv_sf_vsettm ? RISCV::PseudoSF_VSETTM
540 : RISCV::PseudoSF_VSETTK;
541
542 if (IntNo == Intrinsic::riscv_sf_vsettnt) {
543 unsigned VTypeI = RISCVVType::encodeXSfmmVType(SEW, Widen, 0);
544 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
545
546 ReplaceNode(Node, CurDAG->getMachineNode(PseudoOpCode, DL, XLenVT,
547 Node->getOperand(1), VTypeIOp));
548 } else {
549 SDValue Log2SEW = CurDAG->getTargetConstant(Log2_32(SEW), DL, XLenVT);
550 SDValue TWiden = CurDAG->getTargetConstant(Widen, DL, XLenVT);
552 CurDAG->getMachineNode(PseudoOpCode, DL, XLenVT,
553 Node->getOperand(1), Log2SEW, TWiden));
554 }
555}
556
558 MVT VT = Node->getSimpleValueType(0);
559 unsigned Opcode = Node->getOpcode();
560 assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&
561 "Unexpected opcode");
562 SDLoc DL(Node);
563
564 // For operations of the form (x << C1) op C2, check if we can use
565 // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
566 SDValue N0 = Node->getOperand(0);
567 SDValue N1 = Node->getOperand(1);
568
570 if (!Cst)
571 return false;
572
573 int64_t Val = Cst->getSExtValue();
574
575 // Check if immediate can already use ANDI/ORI/XORI.
576 if (isInt<12>(Val))
577 return false;
578
579 SDValue Shift = N0;
580
581 // If Val is simm32 and we have a sext_inreg from i32, then the binop
582 // produces at least 33 sign bits. We can peek through the sext_inreg and use
583 // a SLLIW at the end.
584 bool SignExt = false;
585 if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
586 N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) {
587 SignExt = true;
588 Shift = N0.getOperand(0);
589 }
590
591 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
592 return false;
593
595 if (!ShlCst)
596 return false;
597
598 uint64_t ShAmt = ShlCst->getZExtValue();
599
600 // Make sure that we don't change the operation by removing bits.
601 // This only matters for OR and XOR, AND is unaffected.
602 uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt);
603 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
604 return false;
605
606 int64_t ShiftedVal = Val >> ShAmt;
607 if (!isInt<12>(ShiftedVal))
608 return false;
609
610 // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
611 if (SignExt && ShAmt >= 32)
612 return false;
613
614 // Ok, we can reorder to get a smaller immediate.
615 unsigned BinOpc;
616 switch (Opcode) {
617 default: llvm_unreachable("Unexpected opcode");
618 case ISD::AND: BinOpc = RISCV::ANDI; break;
619 case ISD::OR: BinOpc = RISCV::ORI; break;
620 case ISD::XOR: BinOpc = RISCV::XORI; break;
621 }
622
623 unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;
624
625 SDNode *BinOp = CurDAG->getMachineNode(
626 BinOpc, DL, VT, Shift.getOperand(0),
627 CurDAG->getSignedTargetConstant(ShiftedVal, DL, VT));
628 SDNode *SLLI =
629 CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0),
630 CurDAG->getTargetConstant(ShAmt, DL, VT));
631 ReplaceNode(Node, SLLI);
632 return true;
633}
634
636 unsigned Opc;
637
638 if (Subtarget->hasVendorXTHeadBb())
639 Opc = RISCV::TH_EXT;
640 else if (Subtarget->hasVendorXAndesPerf())
641 Opc = RISCV::NDS_BFOS;
642 else if (Subtarget->hasVendorXqcibm())
643 Opc = RISCV::QC_EXT;
644 else
645 // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment.
646 return false;
647
648 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
649 if (!N1C)
650 return false;
651
652 SDValue N0 = Node->getOperand(0);
653 if (!N0.hasOneUse())
654 return false;
655
656 auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb,
657 const SDLoc &DL, MVT VT) {
658 if (Opc == RISCV::QC_EXT) {
659 // QC.EXT X, width, shamt
660 // shamt is the same as Lsb
661 // width is the number of bits to extract from the Lsb
662 Msb = Msb - Lsb + 1;
663 }
664 return CurDAG->getMachineNode(Opc, DL, VT, N0.getOperand(0),
665 CurDAG->getTargetConstant(Msb, DL, VT),
666 CurDAG->getTargetConstant(Lsb, DL, VT));
667 };
668
669 SDLoc DL(Node);
670 MVT VT = Node->getSimpleValueType(0);
671 const unsigned RightShAmt = N1C->getZExtValue();
672
673 // Transform (sra (shl X, C1) C2) with C1 < C2
674 // -> (SignedBitfieldExtract X, msb, lsb)
675 if (N0.getOpcode() == ISD::SHL) {
676 auto *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
677 if (!N01C)
678 return false;
679
680 const unsigned LeftShAmt = N01C->getZExtValue();
681 // Make sure that this is a bitfield extraction (i.e., the shift-right
682 // amount can not be less than the left-shift).
683 if (LeftShAmt > RightShAmt)
684 return false;
685
686 const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
687 const unsigned Msb = MsbPlusOne - 1;
688 const unsigned Lsb = RightShAmt - LeftShAmt;
689
690 SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT);
691 ReplaceNode(Node, Sbe);
692 return true;
693 }
694
695 // Transform (sra (sext_inreg X, _), C) ->
696 // (SignedBitfieldExtract X, msb, lsb)
697 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
698 unsigned ExtSize =
699 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
700
701 // ExtSize of 32 should use sraiw via tablegen pattern.
702 if (ExtSize == 32)
703 return false;
704
705 const unsigned Msb = ExtSize - 1;
706 // If the shift-right amount is greater than Msb, it means that extracts
707 // the X[Msb] bit and sign-extend it.
708 const unsigned Lsb = RightShAmt > Msb ? Msb : RightShAmt;
709
710 SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT);
711 ReplaceNode(Node, Sbe);
712 return true;
713 }
714
715 return false;
716}
717
719 // Only supported with XAndesPerf at the moment.
720 if (!Subtarget->hasVendorXAndesPerf())
721 return false;
722
723 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
724 if (!N1C)
725 return false;
726
727 SDValue N0 = Node->getOperand(0);
728 if (!N0.hasOneUse())
729 return false;
730
731 auto BitfieldInsert = [&](SDValue N0, unsigned Msb, unsigned Lsb,
732 const SDLoc &DL, MVT VT) {
733 unsigned Opc = RISCV::NDS_BFOS;
734 // If the Lsb is equal to the Msb, then the Lsb should be 0.
735 if (Lsb == Msb)
736 Lsb = 0;
737 return CurDAG->getMachineNode(Opc, DL, VT, N0.getOperand(0),
738 CurDAG->getTargetConstant(Lsb, DL, VT),
739 CurDAG->getTargetConstant(Msb, DL, VT));
740 };
741
742 SDLoc DL(Node);
743 MVT VT = Node->getSimpleValueType(0);
744 const unsigned RightShAmt = N1C->getZExtValue();
745
746 // Transform (sra (shl X, C1) C2) with C1 > C2
747 // -> (NDS.BFOS X, lsb, msb)
748 if (N0.getOpcode() == ISD::SHL) {
749 auto *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
750 if (!N01C)
751 return false;
752
753 const unsigned LeftShAmt = N01C->getZExtValue();
754 // Make sure that this is a bitfield insertion (i.e., the shift-right
755 // amount should be less than the left-shift).
756 if (LeftShAmt <= RightShAmt)
757 return false;
758
759 const unsigned MsbPlusOne = VT.getSizeInBits() - RightShAmt;
760 const unsigned Msb = MsbPlusOne - 1;
761 const unsigned Lsb = LeftShAmt - RightShAmt;
762
763 SDNode *Sbi = BitfieldInsert(N0, Msb, Lsb, DL, VT);
764 ReplaceNode(Node, Sbi);
765 return true;
766 }
767
768 return false;
769}
770
772 const SDLoc &DL, MVT VT,
773 SDValue X, unsigned Msb,
774 unsigned Lsb) {
775 unsigned Opc;
776
777 if (Subtarget->hasVendorXTHeadBb()) {
778 Opc = RISCV::TH_EXTU;
779 } else if (Subtarget->hasVendorXAndesPerf()) {
780 Opc = RISCV::NDS_BFOZ;
781 } else if (Subtarget->hasVendorXqcibm()) {
782 Opc = RISCV::QC_EXTU;
783 // QC.EXTU X, width, shamt
784 // shamt is the same as Lsb
785 // width is the number of bits to extract from the Lsb
786 Msb = Msb - Lsb + 1;
787 } else {
788 // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment.
789 return false;
790 }
791
792 SDNode *Ube = CurDAG->getMachineNode(Opc, DL, VT, X,
793 CurDAG->getTargetConstant(Msb, DL, VT),
794 CurDAG->getTargetConstant(Lsb, DL, VT));
795 ReplaceNode(Node, Ube);
796 return true;
797}
798
800 const SDLoc &DL, MVT VT,
801 SDValue X, unsigned Msb,
802 unsigned Lsb) {
803 // Only supported with XAndesPerf at the moment.
804 if (!Subtarget->hasVendorXAndesPerf())
805 return false;
806
807 unsigned Opc = RISCV::NDS_BFOZ;
808
809 // If the Lsb is equal to the Msb, then the Lsb should be 0.
810 if (Lsb == Msb)
811 Lsb = 0;
812 SDNode *Ubi = CurDAG->getMachineNode(Opc, DL, VT, X,
813 CurDAG->getTargetConstant(Lsb, DL, VT),
814 CurDAG->getTargetConstant(Msb, DL, VT));
815 ReplaceNode(Node, Ubi);
816 return true;
817}
818
820 // Target does not support indexed loads.
821 if (!Subtarget->hasVendorXTHeadMemIdx())
822 return false;
823
826 if (AM == ISD::UNINDEXED)
827 return false;
828
830 if (!C)
831 return false;
832
833 EVT LoadVT = Ld->getMemoryVT();
834 assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) &&
835 "Unexpected addressing mode");
836 bool IsPre = AM == ISD::PRE_INC;
837 bool IsPost = AM == ISD::POST_INC;
838 int64_t Offset = C->getSExtValue();
839
840 // The constants that can be encoded in the THeadMemIdx instructions
841 // are of the form (sign_extend(imm5) << imm2).
842 unsigned Shift;
843 for (Shift = 0; Shift < 4; Shift++)
844 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
845 break;
846
847 // Constant cannot be encoded.
848 if (Shift == 4)
849 return false;
850
851 bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
852 unsigned Opcode;
853 if (LoadVT == MVT::i8 && IsPre)
854 Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
855 else if (LoadVT == MVT::i8 && IsPost)
856 Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
857 else if (LoadVT == MVT::i16 && IsPre)
858 Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
859 else if (LoadVT == MVT::i16 && IsPost)
860 Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
861 else if (LoadVT == MVT::i32 && IsPre)
862 Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
863 else if (LoadVT == MVT::i32 && IsPost)
864 Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
865 else if (LoadVT == MVT::i64 && IsPre)
866 Opcode = RISCV::TH_LDIB;
867 else if (LoadVT == MVT::i64 && IsPost)
868 Opcode = RISCV::TH_LDIA;
869 else
870 return false;
871
872 EVT Ty = Ld->getOffset().getValueType();
873 SDValue Ops[] = {
874 Ld->getBasePtr(),
875 CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(Node), Ty),
876 CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty), Ld->getChain()};
877 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0),
878 Ld->getValueType(1), MVT::Other, Ops);
879
880 MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand();
881 CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp});
882
883 ReplaceNode(Node, New);
884
885 return true;
886}
887
888static Register getTileReg(uint64_t TileNum) {
889 assert(TileNum <= 15 && "Invalid tile number");
890 return RISCV::T0 + TileNum;
891}
892
894 if (!Subtarget->hasVInstructions())
895 return;
896
897 assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode");
898
899 SDLoc DL(Node);
900 unsigned IntNo = Node->getConstantOperandVal(1);
901
902 assert((IntNo == Intrinsic::riscv_sf_vc_x_se ||
903 IntNo == Intrinsic::riscv_sf_vc_i_se) &&
904 "Unexpected vsetvli intrinsic");
905
906 // imm, imm, imm, simm5/scalar, sew, log2lmul, vl
907 unsigned Log2SEW = Log2_32(Node->getConstantOperandVal(6));
908 SDValue SEWOp =
909 CurDAG->getTargetConstant(Log2SEW, DL, Subtarget->getXLenVT());
910 SmallVector<SDValue, 8> Operands = {Node->getOperand(2), Node->getOperand(3),
911 Node->getOperand(4), Node->getOperand(5),
912 Node->getOperand(8), SEWOp,
913 Node->getOperand(0)};
914
915 unsigned Opcode;
916 auto *LMulSDNode = cast<ConstantSDNode>(Node->getOperand(7));
917 switch (LMulSDNode->getSExtValue()) {
918 case 5:
919 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF8
920 : RISCV::PseudoSF_VC_I_SE_MF8;
921 break;
922 case 6:
923 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF4
924 : RISCV::PseudoSF_VC_I_SE_MF4;
925 break;
926 case 7:
927 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF2
928 : RISCV::PseudoSF_VC_I_SE_MF2;
929 break;
930 case 0:
931 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M1
932 : RISCV::PseudoSF_VC_I_SE_M1;
933 break;
934 case 1:
935 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M2
936 : RISCV::PseudoSF_VC_I_SE_M2;
937 break;
938 case 2:
939 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M4
940 : RISCV::PseudoSF_VC_I_SE_M4;
941 break;
942 case 3:
943 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M8
944 : RISCV::PseudoSF_VC_I_SE_M8;
945 break;
946 }
947
948 ReplaceNode(Node, CurDAG->getMachineNode(
949 Opcode, DL, Node->getSimpleValueType(0), Operands));
950}
951
952static unsigned getSegInstNF(unsigned Intrinsic) {
953#define INST_NF_CASE(NAME, NF) \
954 case Intrinsic::riscv_##NAME##NF: \
955 return NF;
956#define INST_NF_CASE_MASK(NAME, NF) \
957 case Intrinsic::riscv_##NAME##NF##_mask: \
958 return NF;
959#define INST_NF_CASE_FF(NAME, NF) \
960 case Intrinsic::riscv_##NAME##NF##ff: \
961 return NF;
962#define INST_NF_CASE_FF_MASK(NAME, NF) \
963 case Intrinsic::riscv_##NAME##NF##ff_mask: \
964 return NF;
965#define INST_ALL_NF_CASE_BASE(MACRO_NAME, NAME) \
966 MACRO_NAME(NAME, 2) \
967 MACRO_NAME(NAME, 3) \
968 MACRO_NAME(NAME, 4) \
969 MACRO_NAME(NAME, 5) \
970 MACRO_NAME(NAME, 6) \
971 MACRO_NAME(NAME, 7) \
972 MACRO_NAME(NAME, 8)
973#define INST_ALL_NF_CASE(NAME) \
974 INST_ALL_NF_CASE_BASE(INST_NF_CASE, NAME) \
975 INST_ALL_NF_CASE_BASE(INST_NF_CASE_MASK, NAME)
976#define INST_ALL_NF_CASE_WITH_FF(NAME) \
977 INST_ALL_NF_CASE(NAME) \
978 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF, NAME) \
979 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF_MASK, NAME)
980 switch (Intrinsic) {
981 default:
982 llvm_unreachable("Unexpected segment load/store intrinsic");
984 INST_ALL_NF_CASE(vlsseg)
985 INST_ALL_NF_CASE(vloxseg)
986 INST_ALL_NF_CASE(vluxseg)
987 INST_ALL_NF_CASE(vsseg)
988 INST_ALL_NF_CASE(vssseg)
989 INST_ALL_NF_CASE(vsoxseg)
990 INST_ALL_NF_CASE(vsuxseg)
991 }
992}
993
995 // If we have a custom node, we have already selected.
996 if (Node->isMachineOpcode()) {
997 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
998 Node->setNodeId(-1);
999 return;
1000 }
1001
1002 // Instruction Selection not handled by the auto-generated tablegen selection
1003 // should be handled here.
1004 unsigned Opcode = Node->getOpcode();
1005 MVT XLenVT = Subtarget->getXLenVT();
1006 SDLoc DL(Node);
1007 MVT VT = Node->getSimpleValueType(0);
1008
1009 bool HasBitTest = Subtarget->hasBEXTILike();
1010
1011 switch (Opcode) {
1012 case ISD::Constant: {
1013 assert((VT == Subtarget->getXLenVT() || VT == MVT::i32) && "Unexpected VT");
1014 auto *ConstNode = cast<ConstantSDNode>(Node);
1015 if (ConstNode->isZero()) {
1016 SDValue New =
1017 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
1018 ReplaceNode(Node, New.getNode());
1019 return;
1020 }
1021 int64_t Imm = ConstNode->getSExtValue();
1022 // If only the lower 8 bits are used, try to convert this to a simm6 by
1023 // sign-extending bit 7. This is neutral without the C extension, and
1024 // allows C.LI to be used if C is present.
1025 if (isUInt<8>(Imm) && isInt<6>(SignExtend64<8>(Imm)) && hasAllBUsers(Node))
1026 Imm = SignExtend64<8>(Imm);
1027 // If the upper XLen-16 bits are not used, try to convert this to a simm12
1028 // by sign extending bit 15.
1029 if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) &&
1031 Imm = SignExtend64<16>(Imm);
1032 // If the upper 32-bits are not used try to convert this into a simm32 by
1033 // sign extending bit 32.
1034 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
1035 Imm = SignExtend64<32>(Imm);
1036
1037 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode());
1038 return;
1039 }
1040 case ISD::ConstantFP: {
1041 const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();
1042
1043 bool Is64Bit = Subtarget->is64Bit();
1044 bool HasZdinx = Subtarget->hasStdExtZdinx();
1045
1046 bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
1047 SDValue Imm;
1048 // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
1049 // create an integer immediate.
1050 if (APF.isPosZero() || NegZeroF64) {
1051 if (VT == MVT::f64 && HasZdinx && !Is64Bit)
1052 Imm = CurDAG->getRegister(RISCV::X0_Pair, MVT::f64);
1053 else
1054 Imm = CurDAG->getRegister(RISCV::X0, XLenVT);
1055 } else {
1056 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
1057 *Subtarget);
1058 }
1059
1060 unsigned Opc;
1061 switch (VT.SimpleTy) {
1062 default:
1063 llvm_unreachable("Unexpected size");
1064 case MVT::bf16:
1065 assert(Subtarget->hasStdExtZfbfmin());
1066 Opc = RISCV::FMV_H_X;
1067 break;
1068 case MVT::f16:
1069 Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
1070 break;
1071 case MVT::f32:
1072 Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
1073 break;
1074 case MVT::f64:
1075 // For RV32, we can't move from a GPR, we need to convert instead. This
1076 // should only happen for +0.0 and -0.0.
1077 assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
1078 if (HasZdinx)
1079 Opc = RISCV::COPY;
1080 else
1081 Opc = Is64Bit ? RISCV::FMV_D_X : RISCV::FCVT_D_W;
1082 break;
1083 }
1084
1085 SDNode *Res;
1086 if (VT.SimpleTy == MVT::f16 && Opc == RISCV::COPY) {
1087 Res =
1088 CurDAG->getTargetExtractSubreg(RISCV::sub_16, DL, VT, Imm).getNode();
1089 } else if (VT.SimpleTy == MVT::f32 && Opc == RISCV::COPY) {
1090 Res =
1091 CurDAG->getTargetExtractSubreg(RISCV::sub_32, DL, VT, Imm).getNode();
1092 } else if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
1093 Res = CurDAG->getMachineNode(
1094 Opc, DL, VT, Imm,
1095 CurDAG->getTargetConstant(RISCVFPRndMode::RNE, DL, XLenVT));
1096 else
1097 Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
1098
1099 // For f64 -0.0, we need to insert a fneg.d idiom.
1100 if (NegZeroF64) {
1101 Opc = RISCV::FSGNJN_D;
1102 if (HasZdinx)
1103 Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;
1104 Res =
1105 CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0));
1106 }
1107
1108 ReplaceNode(Node, Res);
1109 return;
1110 }
1111 case RISCVISD::BuildGPRPair:
1112 case RISCVISD::BuildPairF64: {
1113 if (Opcode == RISCVISD::BuildPairF64 && !Subtarget->hasStdExtZdinx())
1114 break;
1115
1116 assert((!Subtarget->is64Bit() || Opcode == RISCVISD::BuildGPRPair) &&
1117 "BuildPairF64 only handled here on rv32i_zdinx");
1118
1119 SDValue Ops[] = {
1120 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32),
1121 Node->getOperand(0),
1122 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32),
1123 Node->getOperand(1),
1124 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
1125
1126 SDNode *N = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
1127 ReplaceNode(Node, N);
1128 return;
1129 }
1130 case RISCVISD::SplitGPRPair:
1131 case RISCVISD::SplitF64: {
1132 if (Subtarget->hasStdExtZdinx() || Opcode != RISCVISD::SplitF64) {
1133 assert((!Subtarget->is64Bit() || Opcode == RISCVISD::SplitGPRPair) &&
1134 "SplitF64 only handled here on rv32i_zdinx");
1135
1136 if (!SDValue(Node, 0).use_empty()) {
1137 SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL,
1138 Node->getValueType(0),
1139 Node->getOperand(0));
1140 ReplaceUses(SDValue(Node, 0), Lo);
1141 }
1142
1143 if (!SDValue(Node, 1).use_empty()) {
1144 SDValue Hi = CurDAG->getTargetExtractSubreg(
1145 RISCV::sub_gpr_odd, DL, Node->getValueType(1), Node->getOperand(0));
1146 ReplaceUses(SDValue(Node, 1), Hi);
1147 }
1148
1149 CurDAG->RemoveDeadNode(Node);
1150 return;
1151 }
1152
1153 assert(Opcode != RISCVISD::SplitGPRPair &&
1154 "SplitGPRPair should already be handled");
1155
1156 if (!Subtarget->hasStdExtZfa())
1157 break;
1158 assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
1159 "Unexpected subtarget");
1160
1161 // With Zfa, lower to fmv.x.w and fmvh.x.d.
1162 if (!SDValue(Node, 0).use_empty()) {
1163 SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT,
1164 Node->getOperand(0));
1165 ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0));
1166 }
1167 if (!SDValue(Node, 1).use_empty()) {
1168 SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT,
1169 Node->getOperand(0));
1170 ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0));
1171 }
1172
1173 CurDAG->RemoveDeadNode(Node);
1174 return;
1175 }
1176 case ISD::SHL: {
1177 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1178 if (!N1C)
1179 break;
1180 SDValue N0 = Node->getOperand(0);
1181 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
1183 break;
1184 unsigned ShAmt = N1C->getZExtValue();
1185 uint64_t Mask = N0.getConstantOperandVal(1);
1186
1187 if (isShiftedMask_64(Mask)) {
1188 unsigned XLen = Subtarget->getXLen();
1189 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1190 unsigned TrailingZeros = llvm::countr_zero(Mask);
1191 if (ShAmt <= 32 && TrailingZeros > 0 && LeadingZeros == 32) {
1192 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C)
1193 // where C2 has 32 leading zeros and C3 trailing zeros.
1194 SDNode *SRLIW = CurDAG->getMachineNode(
1195 RISCV::SRLIW, DL, VT, N0.getOperand(0),
1196 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1197 SDNode *SLLI = CurDAG->getMachineNode(
1198 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1199 CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));
1200 ReplaceNode(Node, SLLI);
1201 return;
1202 }
1203 if (TrailingZeros == 0 && LeadingZeros > ShAmt &&
1204 XLen - LeadingZeros > 11 && LeadingZeros != 32) {
1205 // Optimize (shl (and X, C2), C) -> (srli (slli X, C4), C4-C)
1206 // where C2 has C4 leading zeros and no trailing zeros.
1207 // This is profitable if the "and" was to be lowered to
1208 // (srli (slli X, C4), C4) and not (andi X, C2).
1209 // For "LeadingZeros == 32":
1210 // - with Zba it's just (slli.uw X, C)
1211 // - without Zba a tablegen pattern applies the very same
1212 // transform as we would have done here
1213 SDNode *SLLI = CurDAG->getMachineNode(
1214 RISCV::SLLI, DL, VT, N0.getOperand(0),
1215 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1216 SDNode *SRLI = CurDAG->getMachineNode(
1217 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1218 CurDAG->getTargetConstant(LeadingZeros - ShAmt, DL, VT));
1219 ReplaceNode(Node, SRLI);
1220 return;
1221 }
1222 }
1223 break;
1224 }
1225 case ISD::SRL: {
1226 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1227 if (!N1C)
1228 break;
1229 SDValue N0 = Node->getOperand(0);
1230 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1231 break;
1232 unsigned ShAmt = N1C->getZExtValue();
1233 uint64_t Mask = N0.getConstantOperandVal(1);
1234
1235 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
1236 // 32 leading zeros and C3 trailing zeros.
1237 if (isShiftedMask_64(Mask) && N0.hasOneUse()) {
1238 unsigned XLen = Subtarget->getXLen();
1239 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1240 unsigned TrailingZeros = llvm::countr_zero(Mask);
1241 if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
1242 SDNode *SRLIW = CurDAG->getMachineNode(
1243 RISCV::SRLIW, DL, VT, N0.getOperand(0),
1244 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1245 SDNode *SLLI = CurDAG->getMachineNode(
1246 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1247 CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT));
1248 ReplaceNode(Node, SLLI);
1249 return;
1250 }
1251 }
1252
1253 // Optimize (srl (and X, C2), C) ->
1254 // (srli (slli X, (XLen-C3), (XLen-C3) + C)
1255 // Where C2 is a mask with C3 trailing ones.
1256 // Taking into account that the C2 may have had lower bits unset by
1257 // SimplifyDemandedBits. This avoids materializing the C2 immediate.
1258 // This pattern occurs when type legalizing right shifts for types with
1259 // less than XLen bits.
1260 Mask |= maskTrailingOnes<uint64_t>(ShAmt);
1261 if (!isMask_64(Mask))
1262 break;
1263 unsigned TrailingOnes = llvm::countr_one(Mask);
1264 if (ShAmt >= TrailingOnes)
1265 break;
1266 // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.
1267 if (TrailingOnes == 32) {
1268 SDNode *SRLI = CurDAG->getMachineNode(
1269 Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT,
1270 N0.getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1271 ReplaceNode(Node, SRLI);
1272 return;
1273 }
1274
1275 // Only do the remaining transforms if the AND has one use.
1276 if (!N0.hasOneUse())
1277 break;
1278
1279 // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1280 if (HasBitTest && ShAmt + 1 == TrailingOnes) {
1281 SDNode *BEXTI = CurDAG->getMachineNode(
1282 Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT,
1283 N0.getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1284 ReplaceNode(Node, BEXTI);
1285 return;
1286 }
1287
1288 const unsigned Msb = TrailingOnes - 1;
1289 const unsigned Lsb = ShAmt;
1290 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0.getOperand(0), Msb, Lsb))
1291 return;
1292
1293 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
1294 SDNode *SLLI =
1295 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1296 CurDAG->getTargetConstant(LShAmt, DL, VT));
1297 SDNode *SRLI = CurDAG->getMachineNode(
1298 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1299 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1300 ReplaceNode(Node, SRLI);
1301 return;
1302 }
1303 case ISD::SRA: {
1305 return;
1306
1308 return;
1309
1310 // Optimize (sra (sext_inreg X, i16), C) ->
1311 // (srai (slli X, (XLen-16), (XLen-16) + C)
1312 // And (sra (sext_inreg X, i8), C) ->
1313 // (srai (slli X, (XLen-8), (XLen-8) + C)
1314 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1315 // This transform matches the code we get without Zbb. The shifts are more
1316 // compressible, and this can help expose CSE opportunities in the sdiv by
1317 // constant optimization.
1318 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1319 if (!N1C)
1320 break;
1321 SDValue N0 = Node->getOperand(0);
1322 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
1323 break;
1324 unsigned ShAmt = N1C->getZExtValue();
1325 unsigned ExtSize =
1326 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
1327 // ExtSize of 32 should use sraiw via tablegen pattern.
1328 if (ExtSize >= 32 || ShAmt >= ExtSize)
1329 break;
1330 unsigned LShAmt = Subtarget->getXLen() - ExtSize;
1331 SDNode *SLLI =
1332 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1333 CurDAG->getTargetConstant(LShAmt, DL, VT));
1334 SDNode *SRAI = CurDAG->getMachineNode(
1335 RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
1336 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1337 ReplaceNode(Node, SRAI);
1338 return;
1339 }
1340 case ISD::OR: {
1342 return;
1343
1344 break;
1345 }
1346 case ISD::XOR:
1348 return;
1349
1350 break;
1351 case ISD::AND: {
1352 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1353 if (!N1C)
1354 break;
1355
1356 SDValue N0 = Node->getOperand(0);
1357
1358 bool LeftShift = N0.getOpcode() == ISD::SHL;
1359 if (LeftShift || N0.getOpcode() == ISD::SRL) {
1360 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1361 if (!C)
1362 break;
1363 unsigned C2 = C->getZExtValue();
1364 unsigned XLen = Subtarget->getXLen();
1365 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1366
1367 // Keep track of whether this is a c.andi. If we can't use c.andi, the
1368 // shift pair might offer more compression opportunities.
1369 // TODO: We could check for C extension here, but we don't have many lit
1370 // tests with the C extension enabled so not checking gets better
1371 // coverage.
1372 // TODO: What if ANDI faster than shift?
1373 bool IsCANDI = isInt<6>(N1C->getSExtValue());
1374
1375 uint64_t C1 = N1C->getZExtValue();
1376
1377 // Clear irrelevant bits in the mask.
1378 if (LeftShift)
1380 else
1381 C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
1382
1383 // Some transforms should only be done if the shift has a single use or
1384 // the AND would become (srli (slli X, 32), 32)
1385 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
1386
1387 SDValue X = N0.getOperand(0);
1388
1389 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1390 // with c3 leading zeros.
1391 if (!LeftShift && isMask_64(C1)) {
1392 unsigned Leading = XLen - llvm::bit_width(C1);
1393 if (C2 < Leading) {
1394 // If the number of leading zeros is C2+32 this can be SRLIW.
1395 if (C2 + 32 == Leading) {
1396 SDNode *SRLIW = CurDAG->getMachineNode(
1397 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
1398 ReplaceNode(Node, SRLIW);
1399 return;
1400 }
1401
1402 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1403 // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1404 //
1405 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1406 // legalized and goes through DAG combine.
1407 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
1408 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1409 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
1410 SDNode *SRAIW =
1411 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0),
1412 CurDAG->getTargetConstant(31, DL, VT));
1413 SDNode *SRLIW = CurDAG->getMachineNode(
1414 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),
1415 CurDAG->getTargetConstant(Leading - 32, DL, VT));
1416 ReplaceNode(Node, SRLIW);
1417 return;
1418 }
1419
1420 // Try to use an unsigned bitfield extract (e.g., th.extu) if
1421 // available.
1422 // Transform (and (srl x, C2), C1)
1423 // -> (<bfextract> x, msb, lsb)
1424 //
1425 // Make sure to keep this below the SRLIW cases, as we always want to
1426 // prefer the more common instruction.
1427 const unsigned Msb = llvm::bit_width(C1) + C2 - 1;
1428 const unsigned Lsb = C2;
1429 if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
1430 return;
1431
1432 // (srli (slli x, c3-c2), c3).
1433 // Skip if we could use (zext.w (sraiw X, C2)).
1434 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
1435 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1436 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
1437 // Also Skip if we can use bexti or th.tst.
1438 Skip |= HasBitTest && Leading == XLen - 1;
1439 if (OneUseOrZExtW && !Skip) {
1440 SDNode *SLLI = CurDAG->getMachineNode(
1441 RISCV::SLLI, DL, VT, X,
1442 CurDAG->getTargetConstant(Leading - C2, DL, VT));
1443 SDNode *SRLI = CurDAG->getMachineNode(
1444 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1445 CurDAG->getTargetConstant(Leading, DL, VT));
1446 ReplaceNode(Node, SRLI);
1447 return;
1448 }
1449 }
1450 }
1451
1452 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1453 // shifted by c2 bits with c3 leading zeros.
1454 if (LeftShift && isShiftedMask_64(C1)) {
1455 unsigned Leading = XLen - llvm::bit_width(C1);
1456
1457 if (C2 + Leading < XLen &&
1458 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) {
1459 // Use slli.uw when possible.
1460 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
1461 SDNode *SLLI_UW =
1462 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X,
1463 CurDAG->getTargetConstant(C2, DL, VT));
1464 ReplaceNode(Node, SLLI_UW);
1465 return;
1466 }
1467
1468 // Try to use an unsigned bitfield insert (e.g., nds.bfoz) if
1469 // available.
1470 // Transform (and (shl x, c2), c1)
1471 // -> (<bfinsert> x, msb, lsb)
1472 // e.g.
1473 // (and (shl x, 12), 0x00fff000)
1474 // If XLen = 32 and C2 = 12, then
1475 // Msb = 32 - 8 - 1 = 23 and Lsb = 12
1476 const unsigned Msb = XLen - Leading - 1;
1477 const unsigned Lsb = C2;
1478 if (tryUnsignedBitfieldInsertInZero(Node, DL, VT, X, Msb, Lsb))
1479 return;
1480
1481 // (srli (slli c2+c3), c3)
1482 if (OneUseOrZExtW && !IsCANDI) {
1483 SDNode *SLLI = CurDAG->getMachineNode(
1484 RISCV::SLLI, DL, VT, X,
1485 CurDAG->getTargetConstant(C2 + Leading, DL, VT));
1486 SDNode *SRLI = CurDAG->getMachineNode(
1487 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1488 CurDAG->getTargetConstant(Leading, DL, VT));
1489 ReplaceNode(Node, SRLI);
1490 return;
1491 }
1492 }
1493 }
1494
1495 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1496 // shifted mask with c2 leading zeros and c3 trailing zeros.
1497 if (!LeftShift && isShiftedMask_64(C1)) {
1498 unsigned Leading = XLen - llvm::bit_width(C1);
1499 unsigned Trailing = llvm::countr_zero(C1);
1500 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
1501 !IsCANDI) {
1502 unsigned SrliOpc = RISCV::SRLI;
1503 // If the input is zexti32 we should use SRLIW.
1504 if (X.getOpcode() == ISD::AND &&
1505 isa<ConstantSDNode>(X.getOperand(1)) &&
1506 X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
1507 SrliOpc = RISCV::SRLIW;
1508 X = X.getOperand(0);
1509 }
1510 SDNode *SRLI = CurDAG->getMachineNode(
1511 SrliOpc, DL, VT, X,
1512 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1513 SDNode *SLLI = CurDAG->getMachineNode(
1514 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1515 CurDAG->getTargetConstant(Trailing, DL, VT));
1516 ReplaceNode(Node, SLLI);
1517 return;
1518 }
1519 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1520 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
1521 OneUseOrZExtW && !IsCANDI) {
1522 SDNode *SRLIW = CurDAG->getMachineNode(
1523 RISCV::SRLIW, DL, VT, X,
1524 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1525 SDNode *SLLI = CurDAG->getMachineNode(
1526 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1527 CurDAG->getTargetConstant(Trailing, DL, VT));
1528 ReplaceNode(Node, SLLI);
1529 return;
1530 }
1531 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1532 if (Trailing > 0 && Leading + Trailing == 32 && C2 + Trailing < XLen &&
1533 OneUseOrZExtW && Subtarget->hasStdExtZba()) {
1534 SDNode *SRLI = CurDAG->getMachineNode(
1535 RISCV::SRLI, DL, VT, X,
1536 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1537 SDNode *SLLI_UW = CurDAG->getMachineNode(
1538 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1539 CurDAG->getTargetConstant(Trailing, DL, VT));
1540 ReplaceNode(Node, SLLI_UW);
1541 return;
1542 }
1543 }
1544
1545 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1546 // shifted mask with no leading zeros and c3 trailing zeros.
1547 if (LeftShift && isShiftedMask_64(C1)) {
1548 unsigned Leading = XLen - llvm::bit_width(C1);
1549 unsigned Trailing = llvm::countr_zero(C1);
1550 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
1551 SDNode *SRLI = CurDAG->getMachineNode(
1552 RISCV::SRLI, DL, VT, X,
1553 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1554 SDNode *SLLI = CurDAG->getMachineNode(
1555 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1556 CurDAG->getTargetConstant(Trailing, DL, VT));
1557 ReplaceNode(Node, SLLI);
1558 return;
1559 }
1560 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1561 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
1562 SDNode *SRLIW = CurDAG->getMachineNode(
1563 RISCV::SRLIW, DL, VT, X,
1564 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1565 SDNode *SLLI = CurDAG->getMachineNode(
1566 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1567 CurDAG->getTargetConstant(Trailing, DL, VT));
1568 ReplaceNode(Node, SLLI);
1569 return;
1570 }
1571
1572 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1573 if (C2 < Trailing && Leading + Trailing == 32 && OneUseOrZExtW &&
1574 Subtarget->hasStdExtZba()) {
1575 SDNode *SRLI = CurDAG->getMachineNode(
1576 RISCV::SRLI, DL, VT, X,
1577 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1578 SDNode *SLLI_UW = CurDAG->getMachineNode(
1579 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1580 CurDAG->getTargetConstant(Trailing, DL, VT));
1581 ReplaceNode(Node, SLLI_UW);
1582 return;
1583 }
1584 }
1585 }
1586
1587 const uint64_t C1 = N1C->getZExtValue();
1588
1589 if (N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
1590 N0.hasOneUse()) {
1591 unsigned C2 = N0.getConstantOperandVal(1);
1592 unsigned XLen = Subtarget->getXLen();
1593 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1594
1595 SDValue X = N0.getOperand(0);
1596
1597 // Prefer SRAIW + ANDI when possible.
1598 bool Skip = C2 > 32 && isInt<12>(N1C->getSExtValue()) &&
1599 X.getOpcode() == ISD::SHL &&
1600 isa<ConstantSDNode>(X.getOperand(1)) &&
1601 X.getConstantOperandVal(1) == 32;
1602 // Turn (and (sra x, c2), c1) -> (srli (srai x, c2-c3), c3) if c1 is a
1603 // mask with c3 leading zeros and c2 is larger than c3.
1604 if (isMask_64(C1) && !Skip) {
1605 unsigned Leading = XLen - llvm::bit_width(C1);
1606 if (C2 > Leading) {
1607 SDNode *SRAI = CurDAG->getMachineNode(
1608 RISCV::SRAI, DL, VT, X,
1609 CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1610 SDNode *SRLI = CurDAG->getMachineNode(
1611 RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1612 CurDAG->getTargetConstant(Leading, DL, VT));
1613 ReplaceNode(Node, SRLI);
1614 return;
1615 }
1616 }
1617
1618 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
1619 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
1620 // use (slli (srli (srai y, c2 - c3), c3 + c4), c4).
1621 if (isShiftedMask_64(C1) && !Skip) {
1622 unsigned Leading = XLen - llvm::bit_width(C1);
1623 unsigned Trailing = llvm::countr_zero(C1);
1624 if (C2 > Leading && Leading > 0 && Trailing > 0) {
1625 SDNode *SRAI = CurDAG->getMachineNode(
1626 RISCV::SRAI, DL, VT, N0.getOperand(0),
1627 CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1628 SDNode *SRLI = CurDAG->getMachineNode(
1629 RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1630 CurDAG->getTargetConstant(Leading + Trailing, DL, VT));
1631 SDNode *SLLI = CurDAG->getMachineNode(
1632 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1633 CurDAG->getTargetConstant(Trailing, DL, VT));
1634 ReplaceNode(Node, SLLI);
1635 return;
1636 }
1637 }
1638 }
1639
1640 // If C1 masks off the upper bits only (but can't be formed as an
1641 // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1642 // available.
1643 // Transform (and x, C1)
1644 // -> (<bfextract> x, msb, lsb)
1645 if (isMask_64(C1) && !isInt<12>(N1C->getSExtValue()) &&
1646 !(C1 == 0xffff && Subtarget->hasStdExtZbb()) &&
1647 !(C1 == 0xffffffff && Subtarget->hasStdExtZba())) {
1648 const unsigned Msb = llvm::bit_width(C1) - 1;
1649 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0))
1650 return;
1651 }
1652
1654 return;
1655
1656 break;
1657 }
1658 case ISD::MUL: {
1659 // Special case for calculating (mul (and X, C2), C1) where the full product
1660 // fits in XLen bits. We can shift X left by the number of leading zeros in
1661 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1662 // product has XLen trailing zeros, putting it in the output of MULHU. This
1663 // can avoid materializing a constant in a register for C2.
1664
1665 // RHS should be a constant.
1666 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1667 if (!N1C || !N1C->hasOneUse())
1668 break;
1669
1670 // LHS should be an AND with constant.
1671 SDValue N0 = Node->getOperand(0);
1672 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1673 break;
1674
1676
1677 // Constant should be a mask.
1678 if (!isMask_64(C2))
1679 break;
1680
1681 // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1682 // multiple users or the constant is a simm12. This prevents inserting a
1683 // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1684 // make it more costly to materialize. Otherwise, using a SLLI might allow
1685 // it to be compressed.
1686 bool IsANDIOrZExt =
1687 isInt<12>(C2) ||
1688 (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
1689 // With XTHeadBb, we can use TH.EXTU.
1690 IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
1691 if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse()))
1692 break;
1693 // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1694 // the constant is a simm32.
1695 bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
1696 // With XTHeadBb, we can use TH.EXTU.
1697 IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
1698 if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse()))
1699 break;
1700
1701 // We need to shift left the AND input and C1 by a total of XLen bits.
1702
1703 // How far left do we need to shift the AND input?
1704 unsigned XLen = Subtarget->getXLen();
1705 unsigned LeadingZeros = XLen - llvm::bit_width(C2);
1706
1707 // The constant gets shifted by the remaining amount unless that would
1708 // shift bits out.
1709 uint64_t C1 = N1C->getZExtValue();
1710 unsigned ConstantShift = XLen - LeadingZeros;
1711 if (ConstantShift > (XLen - llvm::bit_width(C1)))
1712 break;
1713
1714 uint64_t ShiftedC1 = C1 << ConstantShift;
1715 // If this RV32, we need to sign extend the constant.
1716 if (XLen == 32)
1717 ShiftedC1 = SignExtend64<32>(ShiftedC1);
1718
1719 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1720 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode();
1721 SDNode *SLLI =
1722 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1723 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1724 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,
1725 SDValue(SLLI, 0), SDValue(Imm, 0));
1726 ReplaceNode(Node, MULHU);
1727 return;
1728 }
1729 case ISD::LOAD: {
1730 if (tryIndexedLoad(Node))
1731 return;
1732
1733 if (Subtarget->hasVendorXCVmem() && !Subtarget->is64Bit()) {
1734 // We match post-incrementing load here
1736 if (Load->getAddressingMode() != ISD::POST_INC)
1737 break;
1738
1739 SDValue Chain = Node->getOperand(0);
1740 SDValue Base = Node->getOperand(1);
1741 SDValue Offset = Node->getOperand(2);
1742
1743 bool Simm12 = false;
1744 bool SignExtend = Load->getExtensionType() == ISD::SEXTLOAD;
1745
1746 if (auto ConstantOffset = dyn_cast<ConstantSDNode>(Offset)) {
1747 int ConstantVal = ConstantOffset->getSExtValue();
1748 Simm12 = isInt<12>(ConstantVal);
1749 if (Simm12)
1750 Offset = CurDAG->getTargetConstant(ConstantVal, SDLoc(Offset),
1751 Offset.getValueType());
1752 }
1753
1754 unsigned Opcode = 0;
1755 switch (Load->getMemoryVT().getSimpleVT().SimpleTy) {
1756 case MVT::i8:
1757 if (Simm12 && SignExtend)
1758 Opcode = RISCV::CV_LB_ri_inc;
1759 else if (Simm12 && !SignExtend)
1760 Opcode = RISCV::CV_LBU_ri_inc;
1761 else if (!Simm12 && SignExtend)
1762 Opcode = RISCV::CV_LB_rr_inc;
1763 else
1764 Opcode = RISCV::CV_LBU_rr_inc;
1765 break;
1766 case MVT::i16:
1767 if (Simm12 && SignExtend)
1768 Opcode = RISCV::CV_LH_ri_inc;
1769 else if (Simm12 && !SignExtend)
1770 Opcode = RISCV::CV_LHU_ri_inc;
1771 else if (!Simm12 && SignExtend)
1772 Opcode = RISCV::CV_LH_rr_inc;
1773 else
1774 Opcode = RISCV::CV_LHU_rr_inc;
1775 break;
1776 case MVT::i32:
1777 if (Simm12)
1778 Opcode = RISCV::CV_LW_ri_inc;
1779 else
1780 Opcode = RISCV::CV_LW_rr_inc;
1781 break;
1782 default:
1783 break;
1784 }
1785 if (!Opcode)
1786 break;
1787
1788 ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, XLenVT, XLenVT,
1789 Chain.getSimpleValueType(), Base,
1790 Offset, Chain));
1791 return;
1792 }
1793 break;
1794 }
1795 case RISCVISD::LD_RV32: {
1796 assert(Subtarget->hasStdExtZilsd() && "LD_RV32 is only used with Zilsd");
1797
1799 SDValue Chain = Node->getOperand(0);
1800 SDValue Addr = Node->getOperand(1);
1802
1803 SDValue Ops[] = {Base, Offset, Chain};
1804 MachineSDNode *New = CurDAG->getMachineNode(
1805 RISCV::LD_RV32, DL, {MVT::Untyped, MVT::Other}, Ops);
1806 SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL,
1807 MVT::i32, SDValue(New, 0));
1808 SDValue Hi = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_odd, DL,
1809 MVT::i32, SDValue(New, 0));
1810 CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()});
1811 ReplaceUses(SDValue(Node, 0), Lo);
1812 ReplaceUses(SDValue(Node, 1), Hi);
1813 ReplaceUses(SDValue(Node, 2), SDValue(New, 1));
1814 CurDAG->RemoveDeadNode(Node);
1815 return;
1816 }
1817 case RISCVISD::SD_RV32: {
1819 SDValue Chain = Node->getOperand(0);
1820 SDValue Addr = Node->getOperand(3);
1822
1823 SDValue Lo = Node->getOperand(1);
1824 SDValue Hi = Node->getOperand(2);
1825
1826 SDValue RegPair;
1827 // Peephole to use X0_Pair for storing zero.
1829 RegPair = CurDAG->getRegister(RISCV::X0_Pair, MVT::Untyped);
1830 } else {
1831 SDValue Ops[] = {
1832 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32), Lo,
1833 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32), Hi,
1834 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
1835
1836 RegPair = SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
1837 MVT::Untyped, Ops),
1838 0);
1839 }
1840
1841 MachineSDNode *New = CurDAG->getMachineNode(RISCV::SD_RV32, DL, MVT::Other,
1842 {RegPair, Base, Offset, Chain});
1843 CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()});
1844 ReplaceUses(SDValue(Node, 0), SDValue(New, 0));
1845 CurDAG->RemoveDeadNode(Node);
1846 return;
1847 }
1849 unsigned IntNo = Node->getConstantOperandVal(0);
1850 switch (IntNo) {
1851 // By default we do not custom select any intrinsic.
1852 default:
1853 break;
1854 case Intrinsic::riscv_vmsgeu:
1855 case Intrinsic::riscv_vmsge: {
1856 SDValue Src1 = Node->getOperand(1);
1857 SDValue Src2 = Node->getOperand(2);
1858 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
1859 bool IsCmpConstant = false;
1860 bool IsCmpMinimum = false;
1861 // Only custom select scalar second operand.
1862 if (Src2.getValueType() != XLenVT)
1863 break;
1864 // Small constants are handled with patterns.
1865 int64_t CVal = 0;
1866 MVT Src1VT = Src1.getSimpleValueType();
1867 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1868 IsCmpConstant = true;
1869 CVal = C->getSExtValue();
1870 if (CVal >= -15 && CVal <= 16) {
1871 if (!IsUnsigned || CVal != 0)
1872 break;
1873 IsCmpMinimum = true;
1874 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
1875 Src1VT.getScalarSizeInBits())
1876 .getSExtValue()) {
1877 IsCmpMinimum = true;
1878 }
1879 }
1880 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode, VMSGTOpcode;
1881 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1882 default:
1883 llvm_unreachable("Unexpected LMUL!");
1884#define CASE_VMSLT_OPCODES(lmulenum, suffix) \
1885 case RISCVVType::lmulenum: \
1886 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1887 : RISCV::PseudoVMSLT_VX_##suffix; \
1888 VMSGTOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix \
1889 : RISCV::PseudoVMSGT_VX_##suffix; \
1890 break;
1891 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
1892 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
1893 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
1894 CASE_VMSLT_OPCODES(LMUL_1, M1)
1895 CASE_VMSLT_OPCODES(LMUL_2, M2)
1896 CASE_VMSLT_OPCODES(LMUL_4, M4)
1897 CASE_VMSLT_OPCODES(LMUL_8, M8)
1898#undef CASE_VMSLT_OPCODES
1899 }
1900 // Mask operations use the LMUL from the mask type.
1901 switch (RISCVTargetLowering::getLMUL(VT)) {
1902 default:
1903 llvm_unreachable("Unexpected LMUL!");
1904#define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix) \
1905 case RISCVVType::lmulenum: \
1906 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \
1907 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix; \
1908 break;
1909 CASE_VMNAND_VMSET_OPCODES(LMUL_F8, B64)
1910 CASE_VMNAND_VMSET_OPCODES(LMUL_F4, B32)
1911 CASE_VMNAND_VMSET_OPCODES(LMUL_F2, B16)
1912 CASE_VMNAND_VMSET_OPCODES(LMUL_1, B8)
1913 CASE_VMNAND_VMSET_OPCODES(LMUL_2, B4)
1914 CASE_VMNAND_VMSET_OPCODES(LMUL_4, B2)
1915 CASE_VMNAND_VMSET_OPCODES(LMUL_8, B1)
1916#undef CASE_VMNAND_VMSET_OPCODES
1917 }
1918 SDValue SEW = CurDAG->getTargetConstant(
1919 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1920 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
1921 SDValue VL;
1922 selectVLOp(Node->getOperand(3), VL);
1923
1924 // If vmsge(u) with minimum value, expand it to vmset.
1925 if (IsCmpMinimum) {
1927 CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, MaskSEW));
1928 return;
1929 }
1930
1931 if (IsCmpConstant) {
1932 SDValue Imm =
1933 selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget);
1934
1935 ReplaceNode(Node, CurDAG->getMachineNode(VMSGTOpcode, DL, VT,
1936 {Src1, Imm, VL, SEW}));
1937 return;
1938 }
1939
1940 // Expand to
1941 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
1942 SDValue Cmp = SDValue(
1943 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1944 0);
1945 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT,
1946 {Cmp, Cmp, VL, MaskSEW}));
1947 return;
1948 }
1949 case Intrinsic::riscv_vmsgeu_mask:
1950 case Intrinsic::riscv_vmsge_mask: {
1951 SDValue Src1 = Node->getOperand(2);
1952 SDValue Src2 = Node->getOperand(3);
1953 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
1954 bool IsCmpConstant = false;
1955 bool IsCmpMinimum = false;
1956 // Only custom select scalar second operand.
1957 if (Src2.getValueType() != XLenVT)
1958 break;
1959 // Small constants are handled with patterns.
1960 MVT Src1VT = Src1.getSimpleValueType();
1961 int64_t CVal = 0;
1962 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1963 IsCmpConstant = true;
1964 CVal = C->getSExtValue();
1965 if (CVal >= -15 && CVal <= 16) {
1966 if (!IsUnsigned || CVal != 0)
1967 break;
1968 IsCmpMinimum = true;
1969 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
1970 Src1VT.getScalarSizeInBits())
1971 .getSExtValue()) {
1972 IsCmpMinimum = true;
1973 }
1974 }
1975 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
1976 VMOROpcode, VMSGTMaskOpcode;
1977 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1978 default:
1979 llvm_unreachable("Unexpected LMUL!");
1980#define CASE_VMSLT_OPCODES(lmulenum, suffix) \
1981 case RISCVVType::lmulenum: \
1982 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1983 : RISCV::PseudoVMSLT_VX_##suffix; \
1984 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \
1985 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \
1986 VMSGTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix##_MASK \
1987 : RISCV::PseudoVMSGT_VX_##suffix##_MASK; \
1988 break;
1989 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
1990 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
1991 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
1992 CASE_VMSLT_OPCODES(LMUL_1, M1)
1993 CASE_VMSLT_OPCODES(LMUL_2, M2)
1994 CASE_VMSLT_OPCODES(LMUL_4, M4)
1995 CASE_VMSLT_OPCODES(LMUL_8, M8)
1996#undef CASE_VMSLT_OPCODES
1997 }
1998 // Mask operations use the LMUL from the mask type.
1999 switch (RISCVTargetLowering::getLMUL(VT)) {
2000 default:
2001 llvm_unreachable("Unexpected LMUL!");
2002#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \
2003 case RISCVVType::lmulenum: \
2004 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \
2005 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \
2006 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \
2007 break;
2008 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, B64)
2009 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, B32)
2010 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, B16)
2015#undef CASE_VMXOR_VMANDN_VMOR_OPCODES
2016 }
2017 SDValue SEW = CurDAG->getTargetConstant(
2018 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
2019 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
2020 SDValue VL;
2021 selectVLOp(Node->getOperand(5), VL);
2022 SDValue MaskedOff = Node->getOperand(1);
2023 SDValue Mask = Node->getOperand(4);
2024
2025 // If vmsge(u) with minimum value, expand it to vmor mask, maskedoff.
2026 if (IsCmpMinimum) {
2027 // We don't need vmor if the MaskedOff and the Mask are the same
2028 // value.
2029 if (Mask == MaskedOff) {
2030 ReplaceUses(Node, Mask.getNode());
2031 return;
2032 }
2034 CurDAG->getMachineNode(VMOROpcode, DL, VT,
2035 {Mask, MaskedOff, VL, MaskSEW}));
2036 return;
2037 }
2038
2039 // If the MaskedOff value and the Mask are the same value use
2040 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
2041 // This avoids needing to copy v0 to vd before starting the next sequence.
2042 if (Mask == MaskedOff) {
2043 SDValue Cmp = SDValue(
2044 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
2045 0);
2046 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
2047 {Mask, Cmp, VL, MaskSEW}));
2048 return;
2049 }
2050
2051 SDValue PolicyOp =
2052 CurDAG->getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT);
2053
2054 if (IsCmpConstant) {
2055 SDValue Imm =
2056 selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget);
2057
2058 ReplaceNode(Node, CurDAG->getMachineNode(
2059 VMSGTMaskOpcode, DL, VT,
2060 {MaskedOff, Src1, Imm, Mask, VL, SEW, PolicyOp}));
2061 return;
2062 }
2063
2064 // Otherwise use
2065 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
2066 // The result is mask undisturbed.
2067 // We use the same instructions to emulate mask agnostic behavior, because
2068 // the agnostic result can be either undisturbed or all 1.
2069 SDValue Cmp = SDValue(CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
2070 {MaskedOff, Src1, Src2, Mask,
2071 VL, SEW, PolicyOp}),
2072 0);
2073 // vmxor.mm vd, vd, v0 is used to update active value.
2074 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
2075 {Cmp, Mask, VL, MaskSEW}));
2076 return;
2077 }
2078 case Intrinsic::riscv_vsetvli:
2079 case Intrinsic::riscv_vsetvlimax:
2080 return selectVSETVLI(Node);
2081 case Intrinsic::riscv_sf_vsettnt:
2082 case Intrinsic::riscv_sf_vsettm:
2083 case Intrinsic::riscv_sf_vsettk:
2084 return selectXSfmmVSET(Node);
2085 }
2086 break;
2087 }
2089 unsigned IntNo = Node->getConstantOperandVal(1);
2090 switch (IntNo) {
2091 // By default we do not custom select any intrinsic.
2092 default:
2093 break;
2094 case Intrinsic::riscv_vlseg2:
2095 case Intrinsic::riscv_vlseg3:
2096 case Intrinsic::riscv_vlseg4:
2097 case Intrinsic::riscv_vlseg5:
2098 case Intrinsic::riscv_vlseg6:
2099 case Intrinsic::riscv_vlseg7:
2100 case Intrinsic::riscv_vlseg8: {
2101 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2102 /*IsStrided*/ false);
2103 return;
2104 }
2105 case Intrinsic::riscv_vlseg2_mask:
2106 case Intrinsic::riscv_vlseg3_mask:
2107 case Intrinsic::riscv_vlseg4_mask:
2108 case Intrinsic::riscv_vlseg5_mask:
2109 case Intrinsic::riscv_vlseg6_mask:
2110 case Intrinsic::riscv_vlseg7_mask:
2111 case Intrinsic::riscv_vlseg8_mask: {
2112 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2113 /*IsStrided*/ false);
2114 return;
2115 }
2116 case Intrinsic::riscv_vlsseg2:
2117 case Intrinsic::riscv_vlsseg3:
2118 case Intrinsic::riscv_vlsseg4:
2119 case Intrinsic::riscv_vlsseg5:
2120 case Intrinsic::riscv_vlsseg6:
2121 case Intrinsic::riscv_vlsseg7:
2122 case Intrinsic::riscv_vlsseg8: {
2123 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2124 /*IsStrided*/ true);
2125 return;
2126 }
2127 case Intrinsic::riscv_vlsseg2_mask:
2128 case Intrinsic::riscv_vlsseg3_mask:
2129 case Intrinsic::riscv_vlsseg4_mask:
2130 case Intrinsic::riscv_vlsseg5_mask:
2131 case Intrinsic::riscv_vlsseg6_mask:
2132 case Intrinsic::riscv_vlsseg7_mask:
2133 case Intrinsic::riscv_vlsseg8_mask: {
2134 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2135 /*IsStrided*/ true);
2136 return;
2137 }
2138 case Intrinsic::riscv_vloxseg2:
2139 case Intrinsic::riscv_vloxseg3:
2140 case Intrinsic::riscv_vloxseg4:
2141 case Intrinsic::riscv_vloxseg5:
2142 case Intrinsic::riscv_vloxseg6:
2143 case Intrinsic::riscv_vloxseg7:
2144 case Intrinsic::riscv_vloxseg8:
2145 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2146 /*IsOrdered*/ true);
2147 return;
2148 case Intrinsic::riscv_vluxseg2:
2149 case Intrinsic::riscv_vluxseg3:
2150 case Intrinsic::riscv_vluxseg4:
2151 case Intrinsic::riscv_vluxseg5:
2152 case Intrinsic::riscv_vluxseg6:
2153 case Intrinsic::riscv_vluxseg7:
2154 case Intrinsic::riscv_vluxseg8:
2155 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2156 /*IsOrdered*/ false);
2157 return;
2158 case Intrinsic::riscv_vloxseg2_mask:
2159 case Intrinsic::riscv_vloxseg3_mask:
2160 case Intrinsic::riscv_vloxseg4_mask:
2161 case Intrinsic::riscv_vloxseg5_mask:
2162 case Intrinsic::riscv_vloxseg6_mask:
2163 case Intrinsic::riscv_vloxseg7_mask:
2164 case Intrinsic::riscv_vloxseg8_mask:
2165 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2166 /*IsOrdered*/ true);
2167 return;
2168 case Intrinsic::riscv_vluxseg2_mask:
2169 case Intrinsic::riscv_vluxseg3_mask:
2170 case Intrinsic::riscv_vluxseg4_mask:
2171 case Intrinsic::riscv_vluxseg5_mask:
2172 case Intrinsic::riscv_vluxseg6_mask:
2173 case Intrinsic::riscv_vluxseg7_mask:
2174 case Intrinsic::riscv_vluxseg8_mask:
2175 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2176 /*IsOrdered*/ false);
2177 return;
2178 case Intrinsic::riscv_vlseg8ff:
2179 case Intrinsic::riscv_vlseg7ff:
2180 case Intrinsic::riscv_vlseg6ff:
2181 case Intrinsic::riscv_vlseg5ff:
2182 case Intrinsic::riscv_vlseg4ff:
2183 case Intrinsic::riscv_vlseg3ff:
2184 case Intrinsic::riscv_vlseg2ff: {
2185 selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ false);
2186 return;
2187 }
2188 case Intrinsic::riscv_vlseg8ff_mask:
2189 case Intrinsic::riscv_vlseg7ff_mask:
2190 case Intrinsic::riscv_vlseg6ff_mask:
2191 case Intrinsic::riscv_vlseg5ff_mask:
2192 case Intrinsic::riscv_vlseg4ff_mask:
2193 case Intrinsic::riscv_vlseg3ff_mask:
2194 case Intrinsic::riscv_vlseg2ff_mask: {
2195 selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ true);
2196 return;
2197 }
2198 case Intrinsic::riscv_vloxei:
2199 case Intrinsic::riscv_vloxei_mask:
2200 case Intrinsic::riscv_vluxei:
2201 case Intrinsic::riscv_vluxei_mask: {
2202 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
2203 IntNo == Intrinsic::riscv_vluxei_mask;
2204 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
2205 IntNo == Intrinsic::riscv_vloxei_mask;
2206
2207 MVT VT = Node->getSimpleValueType(0);
2208 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2209
2210 unsigned CurOp = 2;
2211 SmallVector<SDValue, 8> Operands;
2212 Operands.push_back(Node->getOperand(CurOp++));
2213
2214 MVT IndexVT;
2215 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2216 /*IsStridedOrIndexed*/ true, Operands,
2217 /*IsLoad=*/true, &IndexVT);
2218
2220 "Element count mismatch");
2221
2224 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2225 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2226 report_fatal_error("The V extension does not support EEW=64 for index "
2227 "values when XLEN=32");
2228 }
2229 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
2230 IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
2231 static_cast<unsigned>(IndexLMUL));
2232 MachineSDNode *Load =
2233 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2234
2235 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2236
2237 ReplaceNode(Node, Load);
2238 return;
2239 }
2240 case Intrinsic::riscv_vlm:
2241 case Intrinsic::riscv_vle:
2242 case Intrinsic::riscv_vle_mask:
2243 case Intrinsic::riscv_vlse:
2244 case Intrinsic::riscv_vlse_mask: {
2245 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
2246 IntNo == Intrinsic::riscv_vlse_mask;
2247 bool IsStrided =
2248 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
2249
2250 MVT VT = Node->getSimpleValueType(0);
2251 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2252
2253 // The riscv_vlm intrinsic are always tail agnostic and no passthru
2254 // operand at the IR level. In pseudos, they have both policy and
2255 // passthru operand. The passthru operand is needed to track the
2256 // "tail undefined" state, and the policy is there just for
2257 // for consistency - it will always be "don't care" for the
2258 // unmasked form.
2259 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
2260 unsigned CurOp = 2;
2261 SmallVector<SDValue, 8> Operands;
2262 if (HasPassthruOperand)
2263 Operands.push_back(Node->getOperand(CurOp++));
2264 else {
2265 // We eagerly lower to implicit_def (instead of undef), as we
2266 // otherwise fail to select nodes such as: nxv1i1 = undef
2267 SDNode *Passthru =
2268 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
2269 Operands.push_back(SDValue(Passthru, 0));
2270 }
2271 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2272 Operands, /*IsLoad=*/true);
2273
2275 const RISCV::VLEPseudo *P =
2276 RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW,
2277 static_cast<unsigned>(LMUL));
2278 MachineSDNode *Load =
2279 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2280
2281 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2282
2283 ReplaceNode(Node, Load);
2284 return;
2285 }
2286 case Intrinsic::riscv_vleff:
2287 case Intrinsic::riscv_vleff_mask: {
2288 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
2289
2290 MVT VT = Node->getSimpleValueType(0);
2291 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2292
2293 unsigned CurOp = 2;
2294 SmallVector<SDValue, 7> Operands;
2295 Operands.push_back(Node->getOperand(CurOp++));
2296 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2297 /*IsStridedOrIndexed*/ false, Operands,
2298 /*IsLoad=*/true);
2299
2301 const RISCV::VLEPseudo *P =
2302 RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true,
2303 Log2SEW, static_cast<unsigned>(LMUL));
2304 MachineSDNode *Load = CurDAG->getMachineNode(
2305 P->Pseudo, DL, Node->getVTList(), Operands);
2306 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2307
2308 ReplaceNode(Node, Load);
2309 return;
2310 }
2311 case Intrinsic::riscv_nds_vln:
2312 case Intrinsic::riscv_nds_vln_mask:
2313 case Intrinsic::riscv_nds_vlnu:
2314 case Intrinsic::riscv_nds_vlnu_mask: {
2315 bool IsMasked = IntNo == Intrinsic::riscv_nds_vln_mask ||
2316 IntNo == Intrinsic::riscv_nds_vlnu_mask;
2317 bool IsUnsigned = IntNo == Intrinsic::riscv_nds_vlnu ||
2318 IntNo == Intrinsic::riscv_nds_vlnu_mask;
2319
2320 MVT VT = Node->getSimpleValueType(0);
2321 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2322 unsigned CurOp = 2;
2323 SmallVector<SDValue, 8> Operands;
2324
2325 Operands.push_back(Node->getOperand(CurOp++));
2326 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2327 /*IsStridedOrIndexed=*/false, Operands,
2328 /*IsLoad=*/true);
2329
2331 const RISCV::NDSVLNPseudo *P = RISCV::getNDSVLNPseudo(
2332 IsMasked, IsUnsigned, Log2SEW, static_cast<unsigned>(LMUL));
2333 MachineSDNode *Load =
2334 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2335
2336 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2337 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
2338
2339 ReplaceNode(Node, Load);
2340 return;
2341 }
2342 }
2343 break;
2344 }
2345 case ISD::INTRINSIC_VOID: {
2346 unsigned IntNo = Node->getConstantOperandVal(1);
2347 switch (IntNo) {
2348 case Intrinsic::riscv_vsseg2:
2349 case Intrinsic::riscv_vsseg3:
2350 case Intrinsic::riscv_vsseg4:
2351 case Intrinsic::riscv_vsseg5:
2352 case Intrinsic::riscv_vsseg6:
2353 case Intrinsic::riscv_vsseg7:
2354 case Intrinsic::riscv_vsseg8: {
2355 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2356 /*IsStrided*/ false);
2357 return;
2358 }
2359 case Intrinsic::riscv_vsseg2_mask:
2360 case Intrinsic::riscv_vsseg3_mask:
2361 case Intrinsic::riscv_vsseg4_mask:
2362 case Intrinsic::riscv_vsseg5_mask:
2363 case Intrinsic::riscv_vsseg6_mask:
2364 case Intrinsic::riscv_vsseg7_mask:
2365 case Intrinsic::riscv_vsseg8_mask: {
2366 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2367 /*IsStrided*/ false);
2368 return;
2369 }
2370 case Intrinsic::riscv_vssseg2:
2371 case Intrinsic::riscv_vssseg3:
2372 case Intrinsic::riscv_vssseg4:
2373 case Intrinsic::riscv_vssseg5:
2374 case Intrinsic::riscv_vssseg6:
2375 case Intrinsic::riscv_vssseg7:
2376 case Intrinsic::riscv_vssseg8: {
2377 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2378 /*IsStrided*/ true);
2379 return;
2380 }
2381 case Intrinsic::riscv_vssseg2_mask:
2382 case Intrinsic::riscv_vssseg3_mask:
2383 case Intrinsic::riscv_vssseg4_mask:
2384 case Intrinsic::riscv_vssseg5_mask:
2385 case Intrinsic::riscv_vssseg6_mask:
2386 case Intrinsic::riscv_vssseg7_mask:
2387 case Intrinsic::riscv_vssseg8_mask: {
2388 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2389 /*IsStrided*/ true);
2390 return;
2391 }
2392 case Intrinsic::riscv_vsoxseg2:
2393 case Intrinsic::riscv_vsoxseg3:
2394 case Intrinsic::riscv_vsoxseg4:
2395 case Intrinsic::riscv_vsoxseg5:
2396 case Intrinsic::riscv_vsoxseg6:
2397 case Intrinsic::riscv_vsoxseg7:
2398 case Intrinsic::riscv_vsoxseg8:
2399 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2400 /*IsOrdered*/ true);
2401 return;
2402 case Intrinsic::riscv_vsuxseg2:
2403 case Intrinsic::riscv_vsuxseg3:
2404 case Intrinsic::riscv_vsuxseg4:
2405 case Intrinsic::riscv_vsuxseg5:
2406 case Intrinsic::riscv_vsuxseg6:
2407 case Intrinsic::riscv_vsuxseg7:
2408 case Intrinsic::riscv_vsuxseg8:
2409 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2410 /*IsOrdered*/ false);
2411 return;
2412 case Intrinsic::riscv_vsoxseg2_mask:
2413 case Intrinsic::riscv_vsoxseg3_mask:
2414 case Intrinsic::riscv_vsoxseg4_mask:
2415 case Intrinsic::riscv_vsoxseg5_mask:
2416 case Intrinsic::riscv_vsoxseg6_mask:
2417 case Intrinsic::riscv_vsoxseg7_mask:
2418 case Intrinsic::riscv_vsoxseg8_mask:
2419 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2420 /*IsOrdered*/ true);
2421 return;
2422 case Intrinsic::riscv_vsuxseg2_mask:
2423 case Intrinsic::riscv_vsuxseg3_mask:
2424 case Intrinsic::riscv_vsuxseg4_mask:
2425 case Intrinsic::riscv_vsuxseg5_mask:
2426 case Intrinsic::riscv_vsuxseg6_mask:
2427 case Intrinsic::riscv_vsuxseg7_mask:
2428 case Intrinsic::riscv_vsuxseg8_mask:
2429 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2430 /*IsOrdered*/ false);
2431 return;
2432 case Intrinsic::riscv_vsoxei:
2433 case Intrinsic::riscv_vsoxei_mask:
2434 case Intrinsic::riscv_vsuxei:
2435 case Intrinsic::riscv_vsuxei_mask: {
2436 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
2437 IntNo == Intrinsic::riscv_vsuxei_mask;
2438 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
2439 IntNo == Intrinsic::riscv_vsoxei_mask;
2440
2441 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2442 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2443
2444 unsigned CurOp = 2;
2445 SmallVector<SDValue, 8> Operands;
2446 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2447
2448 MVT IndexVT;
2449 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2450 /*IsStridedOrIndexed*/ true, Operands,
2451 /*IsLoad=*/false, &IndexVT);
2452
2454 "Element count mismatch");
2455
2458 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2459 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2460 report_fatal_error("The V extension does not support EEW=64 for index "
2461 "values when XLEN=32");
2462 }
2463 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
2464 IsMasked, IsOrdered, IndexLog2EEW,
2465 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
2466 MachineSDNode *Store =
2467 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2468
2469 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
2470
2471 ReplaceNode(Node, Store);
2472 return;
2473 }
2474 case Intrinsic::riscv_vsm:
2475 case Intrinsic::riscv_vse:
2476 case Intrinsic::riscv_vse_mask:
2477 case Intrinsic::riscv_vsse:
2478 case Intrinsic::riscv_vsse_mask: {
2479 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
2480 IntNo == Intrinsic::riscv_vsse_mask;
2481 bool IsStrided =
2482 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
2483
2484 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2485 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2486
2487 unsigned CurOp = 2;
2488 SmallVector<SDValue, 8> Operands;
2489 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2490
2491 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2492 Operands);
2493
2495 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
2496 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
2497 MachineSDNode *Store =
2498 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2499 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
2500
2501 ReplaceNode(Node, Store);
2502 return;
2503 }
2504 case Intrinsic::riscv_sf_vc_x_se:
2505 case Intrinsic::riscv_sf_vc_i_se:
2507 return;
2508 case Intrinsic::riscv_sf_vlte8:
2509 case Intrinsic::riscv_sf_vlte16:
2510 case Intrinsic::riscv_sf_vlte32:
2511 case Intrinsic::riscv_sf_vlte64: {
2512 unsigned Log2SEW;
2513 unsigned PseudoInst;
2514 switch (IntNo) {
2515 case Intrinsic::riscv_sf_vlte8:
2516 PseudoInst = RISCV::PseudoSF_VLTE8;
2517 Log2SEW = 3;
2518 break;
2519 case Intrinsic::riscv_sf_vlte16:
2520 PseudoInst = RISCV::PseudoSF_VLTE16;
2521 Log2SEW = 4;
2522 break;
2523 case Intrinsic::riscv_sf_vlte32:
2524 PseudoInst = RISCV::PseudoSF_VLTE32;
2525 Log2SEW = 5;
2526 break;
2527 case Intrinsic::riscv_sf_vlte64:
2528 PseudoInst = RISCV::PseudoSF_VLTE64;
2529 Log2SEW = 6;
2530 break;
2531 }
2532
2533 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
2534 SDValue TWidenOp = CurDAG->getTargetConstant(1, DL, XLenVT);
2535 SDValue Operands[] = {Node->getOperand(2),
2536 Node->getOperand(3),
2537 Node->getOperand(4),
2538 SEWOp,
2539 TWidenOp,
2540 Node->getOperand(0)};
2541
2542 MachineSDNode *TileLoad =
2543 CurDAG->getMachineNode(PseudoInst, DL, Node->getVTList(), Operands);
2544 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2545 CurDAG->setNodeMemRefs(TileLoad, {MemOp->getMemOperand()});
2546
2547 ReplaceNode(Node, TileLoad);
2548 return;
2549 }
2550 case Intrinsic::riscv_sf_mm_s_s:
2551 case Intrinsic::riscv_sf_mm_s_u:
2552 case Intrinsic::riscv_sf_mm_u_s:
2553 case Intrinsic::riscv_sf_mm_u_u:
2554 case Intrinsic::riscv_sf_mm_e5m2_e5m2:
2555 case Intrinsic::riscv_sf_mm_e5m2_e4m3:
2556 case Intrinsic::riscv_sf_mm_e4m3_e5m2:
2557 case Intrinsic::riscv_sf_mm_e4m3_e4m3:
2558 case Intrinsic::riscv_sf_mm_f_f: {
2559 bool HasFRM = false;
2560 unsigned PseudoInst;
2561 switch (IntNo) {
2562 case Intrinsic::riscv_sf_mm_s_s:
2563 PseudoInst = RISCV::PseudoSF_MM_S_S;
2564 break;
2565 case Intrinsic::riscv_sf_mm_s_u:
2566 PseudoInst = RISCV::PseudoSF_MM_S_U;
2567 break;
2568 case Intrinsic::riscv_sf_mm_u_s:
2569 PseudoInst = RISCV::PseudoSF_MM_U_S;
2570 break;
2571 case Intrinsic::riscv_sf_mm_u_u:
2572 PseudoInst = RISCV::PseudoSF_MM_U_U;
2573 break;
2574 case Intrinsic::riscv_sf_mm_e5m2_e5m2:
2575 PseudoInst = RISCV::PseudoSF_MM_E5M2_E5M2;
2576 HasFRM = true;
2577 break;
2578 case Intrinsic::riscv_sf_mm_e5m2_e4m3:
2579 PseudoInst = RISCV::PseudoSF_MM_E5M2_E4M3;
2580 HasFRM = true;
2581 break;
2582 case Intrinsic::riscv_sf_mm_e4m3_e5m2:
2583 PseudoInst = RISCV::PseudoSF_MM_E4M3_E5M2;
2584 HasFRM = true;
2585 break;
2586 case Intrinsic::riscv_sf_mm_e4m3_e4m3:
2587 PseudoInst = RISCV::PseudoSF_MM_E4M3_E4M3;
2588 HasFRM = true;
2589 break;
2590 case Intrinsic::riscv_sf_mm_f_f:
2591 if (Node->getOperand(3).getValueType().getScalarType() == MVT::bf16)
2592 PseudoInst = RISCV::PseudoSF_MM_F_F_ALT;
2593 else
2594 PseudoInst = RISCV::PseudoSF_MM_F_F;
2595 HasFRM = true;
2596 break;
2597 }
2598 uint64_t TileNum = Node->getConstantOperandVal(2);
2599 SDValue Op1 = Node->getOperand(3);
2600 SDValue Op2 = Node->getOperand(4);
2601 MVT VT = Op1->getSimpleValueType(0);
2602 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2603 SDValue TmOp = Node->getOperand(5);
2604 SDValue TnOp = Node->getOperand(6);
2605 SDValue TkOp = Node->getOperand(7);
2606 SDValue TWidenOp = Node->getOperand(8);
2607 SDValue Chain = Node->getOperand(0);
2608
2609 // sf.mm.f.f with sew=32, twiden=2 is invalid
2610 if (IntNo == Intrinsic::riscv_sf_mm_f_f && Log2SEW == 5 &&
2611 TWidenOp->getAsZExtVal() == 2)
2612 reportFatalUsageError("sf.mm.f.f doesn't support (sew=32, twiden=2)");
2613
2614 SmallVector<SDValue, 10> Operands(
2615 {CurDAG->getRegister(getTileReg(TileNum), XLenVT), Op1, Op2});
2616 if (HasFRM)
2617 Operands.push_back(
2618 CurDAG->getTargetConstant(RISCVFPRndMode::DYN, DL, XLenVT));
2619 Operands.append({TmOp, TnOp, TkOp,
2620 CurDAG->getTargetConstant(Log2SEW, DL, XLenVT), TWidenOp,
2621 Chain});
2622
2623 auto *NewNode =
2624 CurDAG->getMachineNode(PseudoInst, DL, Node->getVTList(), Operands);
2625
2626 ReplaceNode(Node, NewNode);
2627 return;
2628 }
2629 case Intrinsic::riscv_sf_vtzero_t: {
2630 uint64_t TileNum = Node->getConstantOperandVal(2);
2631 SDValue Tm = Node->getOperand(3);
2632 SDValue Tn = Node->getOperand(4);
2633 SDValue Log2SEW = Node->getOperand(5);
2634 SDValue TWiden = Node->getOperand(6);
2635 SDValue Chain = Node->getOperand(0);
2636 auto *NewNode = CurDAG->getMachineNode(
2637 RISCV::PseudoSF_VTZERO_T, DL, Node->getVTList(),
2638 {CurDAG->getRegister(getTileReg(TileNum), XLenVT), Tm, Tn, Log2SEW,
2639 TWiden, Chain});
2640
2641 ReplaceNode(Node, NewNode);
2642 return;
2643 }
2644 }
2645 break;
2646 }
2647 case ISD::BITCAST: {
2648 MVT SrcVT = Node->getOperand(0).getSimpleValueType();
2649 // Just drop bitcasts between vectors if both are fixed or both are
2650 // scalable.
2651 if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
2652 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
2653 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2654 CurDAG->RemoveDeadNode(Node);
2655 return;
2656 }
2657 break;
2658 }
2660 case RISCVISD::TUPLE_INSERT: {
2661 SDValue V = Node->getOperand(0);
2662 SDValue SubV = Node->getOperand(1);
2663 SDLoc DL(SubV);
2664 auto Idx = Node->getConstantOperandVal(2);
2665 MVT SubVecVT = SubV.getSimpleValueType();
2666
2667 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2668 MVT SubVecContainerVT = SubVecVT;
2669 // Establish the correct scalable-vector types for any fixed-length type.
2670 if (SubVecVT.isFixedLengthVector()) {
2671 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
2673 [[maybe_unused]] bool ExactlyVecRegSized =
2674 Subtarget->expandVScale(SubVecVT.getSizeInBits())
2675 .isKnownMultipleOf(Subtarget->expandVScale(VecRegSize));
2676 assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())
2677 .getKnownMinValue()));
2678 assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
2679 }
2680 MVT ContainerVT = VT;
2681 if (VT.isFixedLengthVector())
2682 ContainerVT = TLI.getContainerForFixedLengthVector(VT);
2683
2684 const auto *TRI = Subtarget->getRegisterInfo();
2685 unsigned SubRegIdx;
2686 std::tie(SubRegIdx, Idx) =
2688 ContainerVT, SubVecContainerVT, Idx, TRI);
2689
2690 // If the Idx hasn't been completely eliminated then this is a subvector
2691 // insert which doesn't naturally align to a vector register. These must
2692 // be handled using instructions to manipulate the vector registers.
2693 if (Idx != 0)
2694 break;
2695
2696 RISCVVType::VLMUL SubVecLMUL =
2697 RISCVTargetLowering::getLMUL(SubVecContainerVT);
2698 [[maybe_unused]] bool IsSubVecPartReg =
2699 SubVecLMUL == RISCVVType::VLMUL::LMUL_F2 ||
2700 SubVecLMUL == RISCVVType::VLMUL::LMUL_F4 ||
2701 SubVecLMUL == RISCVVType::VLMUL::LMUL_F8;
2702 assert((V.getValueType().isRISCVVectorTuple() || !IsSubVecPartReg ||
2703 V.isUndef()) &&
2704 "Expecting lowering to have created legal INSERT_SUBVECTORs when "
2705 "the subvector is smaller than a full-sized register");
2706
2707 // If we haven't set a SubRegIdx, then we must be going between
2708 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
2709 if (SubRegIdx == RISCV::NoSubRegister) {
2710 unsigned InRegClassID =
2713 InRegClassID &&
2714 "Unexpected subvector extraction");
2715 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2716 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
2717 DL, VT, SubV, RC);
2718 ReplaceNode(Node, NewNode);
2719 return;
2720 }
2721
2722 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV);
2723 ReplaceNode(Node, Insert.getNode());
2724 return;
2725 }
2727 case RISCVISD::TUPLE_EXTRACT: {
2728 SDValue V = Node->getOperand(0);
2729 auto Idx = Node->getConstantOperandVal(1);
2730 MVT InVT = V.getSimpleValueType();
2731 SDLoc DL(V);
2732
2733 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2734 MVT SubVecContainerVT = VT;
2735 // Establish the correct scalable-vector types for any fixed-length type.
2736 if (VT.isFixedLengthVector()) {
2737 assert(Idx == 0);
2738 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
2739 }
2740 if (InVT.isFixedLengthVector())
2741 InVT = TLI.getContainerForFixedLengthVector(InVT);
2742
2743 const auto *TRI = Subtarget->getRegisterInfo();
2744 unsigned SubRegIdx;
2745 std::tie(SubRegIdx, Idx) =
2747 InVT, SubVecContainerVT, Idx, TRI);
2748
2749 // If the Idx hasn't been completely eliminated then this is a subvector
2750 // extract which doesn't naturally align to a vector register. These must
2751 // be handled using instructions to manipulate the vector registers.
2752 if (Idx != 0)
2753 break;
2754
2755 // If we haven't set a SubRegIdx, then we must be going between
2756 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
2757 if (SubRegIdx == RISCV::NoSubRegister) {
2758 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT);
2760 InRegClassID &&
2761 "Unexpected subvector extraction");
2762 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2763 SDNode *NewNode =
2764 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
2765 ReplaceNode(Node, NewNode);
2766 return;
2767 }
2768
2769 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
2770 ReplaceNode(Node, Extract.getNode());
2771 return;
2772 }
2773 case RISCVISD::VMV_S_X_VL:
2774 case RISCVISD::VFMV_S_F_VL:
2775 case RISCVISD::VMV_V_X_VL:
2776 case RISCVISD::VFMV_V_F_VL: {
2777 // Try to match splat of a scalar load to a strided load with stride of x0.
2778 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
2779 Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
2780 if (!Node->getOperand(0).isUndef())
2781 break;
2782 SDValue Src = Node->getOperand(1);
2783 auto *Ld = dyn_cast<LoadSDNode>(Src);
2784 // Can't fold load update node because the second
2785 // output is used so that load update node can't be removed.
2786 if (!Ld || Ld->isIndexed())
2787 break;
2788 EVT MemVT = Ld->getMemoryVT();
2789 // The memory VT should be the same size as the element type.
2790 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
2791 break;
2792 if (!IsProfitableToFold(Src, Node, Node) ||
2793 !IsLegalToFold(Src, Node, Node, TM.getOptLevel()))
2794 break;
2795
2796 SDValue VL;
2797 if (IsScalarMove) {
2798 // We could deal with more VL if we update the VSETVLI insert pass to
2799 // avoid introducing more VSETVLI.
2800 if (!isOneConstant(Node->getOperand(2)))
2801 break;
2802 selectVLOp(Node->getOperand(2), VL);
2803 } else
2804 selectVLOp(Node->getOperand(2), VL);
2805
2806 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2807 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
2808
2809 // If VL=1, then we don't need to do a strided load and can just do a
2810 // regular load.
2811 bool IsStrided = !isOneConstant(VL);
2812
2813 // Only do a strided load if we have optimized zero-stride vector load.
2814 if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
2815 break;
2816
2817 SmallVector<SDValue> Operands = {
2818 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0),
2819 Ld->getBasePtr()};
2820 if (IsStrided)
2821 Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT));
2823 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
2824 Operands.append({VL, SEW, PolicyOp, Ld->getChain()});
2825
2827 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
2828 /*IsMasked*/ false, IsStrided, /*FF*/ false,
2829 Log2SEW, static_cast<unsigned>(LMUL));
2830 MachineSDNode *Load =
2831 CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands);
2832 // Update the chain.
2833 ReplaceUses(Src.getValue(1), SDValue(Load, 1));
2834 // Record the mem-refs
2835 CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});
2836 // Replace the splat with the vlse.
2837 ReplaceNode(Node, Load);
2838 return;
2839 }
2840 case ISD::PREFETCH:
2841 unsigned Locality = Node->getConstantOperandVal(3);
2842 if (Locality > 2)
2843 break;
2844
2845 auto *LoadStoreMem = cast<MemSDNode>(Node);
2846 MachineMemOperand *MMO = LoadStoreMem->getMemOperand();
2848
2849 int NontemporalLevel = 0;
2850 switch (Locality) {
2851 case 0:
2852 NontemporalLevel = 3; // NTL.ALL
2853 break;
2854 case 1:
2855 NontemporalLevel = 1; // NTL.PALL
2856 break;
2857 case 2:
2858 NontemporalLevel = 0; // NTL.P1
2859 break;
2860 default:
2861 llvm_unreachable("unexpected locality value.");
2862 }
2863
2864 if (NontemporalLevel & 0b1)
2866 if (NontemporalLevel & 0b10)
2868 break;
2869 }
2870
2871 // Select the default instruction.
2872 SelectCode(Node);
2873}
2874
2876 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
2877 std::vector<SDValue> &OutOps) {
2878 // Always produce a register and immediate operand, as expected by
2879 // RISCVAsmPrinter::PrintAsmMemoryOperand.
2880 switch (ConstraintID) {
2883 SDValue Op0, Op1;
2884 [[maybe_unused]] bool Found = SelectAddrRegImm(Op, Op0, Op1);
2885 assert(Found && "SelectAddrRegImm should always succeed");
2886 OutOps.push_back(Op0);
2887 OutOps.push_back(Op1);
2888 return false;
2889 }
2891 OutOps.push_back(Op);
2892 OutOps.push_back(
2893 CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT()));
2894 return false;
2895 default:
2896 report_fatal_error("Unexpected asm memory constraint " +
2897 InlineAsm::getMemConstraintName(ConstraintID));
2898 }
2899
2900 return true;
2901}
2902
2904 SDValue &Offset) {
2905 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
2906 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
2907 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT());
2908 return true;
2909 }
2910
2911 return false;
2912}
2913
2914// Fold constant addresses.
2915static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
2916 const MVT VT, const RISCVSubtarget *Subtarget,
2918 bool IsPrefetch = false) {
2919 if (!isa<ConstantSDNode>(Addr))
2920 return false;
2921
2922 int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue();
2923
2924 // If the constant is a simm12, we can fold the whole constant and use X0 as
2925 // the base. If the constant can be materialized with LUI+simm12, use LUI as
2926 // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
2927 int64_t Lo12 = SignExtend64<12>(CVal);
2928 int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
2929 if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
2930 if (IsPrefetch && (Lo12 & 0b11111) != 0)
2931 return false;
2932 if (Hi) {
2933 int64_t Hi20 = (Hi >> 12) & 0xfffff;
2934 Base = SDValue(
2935 CurDAG->getMachineNode(RISCV::LUI, DL, VT,
2936 CurDAG->getTargetConstant(Hi20, DL, VT)),
2937 0);
2938 } else {
2939 Base = CurDAG->getRegister(RISCV::X0, VT);
2940 }
2941 Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT);
2942 return true;
2943 }
2944
2945 // Ask how constant materialization would handle this constant.
2946 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget);
2947
2948 // If the last instruction would be an ADDI, we can fold its immediate and
2949 // emit the rest of the sequence as the base.
2950 if (Seq.back().getOpcode() != RISCV::ADDI)
2951 return false;
2952 Lo12 = Seq.back().getImm();
2953 if (IsPrefetch && (Lo12 & 0b11111) != 0)
2954 return false;
2955
2956 // Drop the last instruction.
2957 Seq.pop_back();
2958 assert(!Seq.empty() && "Expected more instructions in sequence");
2959
2960 Base = selectImmSeq(CurDAG, DL, VT, Seq);
2961 Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT);
2962 return true;
2963}
2964
2965// Is this ADD instruction only used as the base pointer of scalar loads and
2966// stores?
2968 for (auto *User : Add->users()) {
2969 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
2970 User->getOpcode() != RISCVISD::LD_RV32 &&
2971 User->getOpcode() != RISCVISD::SD_RV32 &&
2972 User->getOpcode() != ISD::ATOMIC_LOAD &&
2973 User->getOpcode() != ISD::ATOMIC_STORE)
2974 return false;
2975 EVT VT = cast<MemSDNode>(User)->getMemoryVT();
2976 if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
2977 VT != MVT::f64)
2978 return false;
2979 // Don't allow stores of the value. It must be used as the address.
2980 if (User->getOpcode() == ISD::STORE &&
2981 cast<StoreSDNode>(User)->getValue() == Add)
2982 return false;
2983 if (User->getOpcode() == ISD::ATOMIC_STORE &&
2984 cast<AtomicSDNode>(User)->getVal() == Add)
2985 return false;
2986 if (User->getOpcode() == RISCVISD::SD_RV32 &&
2987 (User->getOperand(0) == Add || User->getOperand(1) == Add))
2988 return false;
2989 if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering()))
2990 return false;
2991 }
2992
2993 return true;
2994}
2995
2997 switch (User->getOpcode()) {
2998 default:
2999 return false;
3000 case ISD::LOAD:
3001 case RISCVISD::LD_RV32:
3002 case ISD::ATOMIC_LOAD:
3003 break;
3004 case ISD::STORE:
3005 // Don't allow stores of Add. It must only be used as the address.
3006 if (cast<StoreSDNode>(User)->getValue() == Add)
3007 return false;
3008 break;
3009 case RISCVISD::SD_RV32:
3010 // Don't allow stores of Add. It must only be used as the address.
3011 if (User->getOperand(0) == Add || User->getOperand(1) == Add)
3012 return false;
3013 break;
3014 case ISD::ATOMIC_STORE:
3015 // Don't allow stores of Add. It must only be used as the address.
3016 if (cast<AtomicSDNode>(User)->getVal() == Add)
3017 return false;
3018 break;
3019 }
3020
3021 return true;
3022}
3023
3024// To prevent SelectAddrRegImm from folding offsets that conflict with the
3025// fusion of PseudoMovAddr, check if the offset of every use of a given address
3026// is within the alignment.
3028 Align Alignment) {
3029 assert(Addr->getOpcode() == RISCVISD::ADD_LO);
3030 for (auto *User : Addr->users()) {
3031 // If the user is a load or store, then the offset is 0 which is always
3032 // within alignment.
3033 if (isRegImmLoadOrStore(User, Addr))
3034 continue;
3035
3036 if (CurDAG->isBaseWithConstantOffset(SDValue(User, 0))) {
3037 int64_t CVal = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
3038 if (!isInt<12>(CVal) || Alignment <= CVal)
3039 return false;
3040
3041 // Make sure all uses are foldable load/stores.
3042 for (auto *AddUser : User->users())
3043 if (!isRegImmLoadOrStore(AddUser, SDValue(User, 0)))
3044 return false;
3045
3046 continue;
3047 }
3048
3049 return false;
3050 }
3051
3052 return true;
3053}
3054
3056 SDValue &Offset) {
3057 if (SelectAddrFrameIndex(Addr, Base, Offset))
3058 return true;
3059
3060 SDLoc DL(Addr);
3061 MVT VT = Addr.getSimpleValueType();
3062
3063 if (Addr.getOpcode() == RISCVISD::ADD_LO) {
3064 bool CanFold = true;
3065 // Unconditionally fold if operand 1 is not a global address (e.g.
3066 // externsymbol)
3067 if (auto *GA = dyn_cast<GlobalAddressSDNode>(Addr.getOperand(1))) {
3068 const DataLayout &DL = CurDAG->getDataLayout();
3069 Align Alignment = commonAlignment(
3070 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
3071 if (!areOffsetsWithinAlignment(Addr, Alignment))
3072 CanFold = false;
3073 }
3074 if (CanFold) {
3075 Base = Addr.getOperand(0);
3076 Offset = Addr.getOperand(1);
3077 return true;
3078 }
3079 }
3080
3081 if (CurDAG->isBaseWithConstantOffset(Addr)) {
3082 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3083 if (isInt<12>(CVal)) {
3084 Base = Addr.getOperand(0);
3085 if (Base.getOpcode() == RISCVISD::ADD_LO) {
3086 SDValue LoOperand = Base.getOperand(1);
3087 if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) {
3088 // If the Lo in (ADD_LO hi, lo) is a global variable's address
3089 // (its low part, really), then we can rely on the alignment of that
3090 // variable to provide a margin of safety before low part can overflow
3091 // the 12 bits of the load/store offset. Check if CVal falls within
3092 // that margin; if so (low part + CVal) can't overflow.
3093 const DataLayout &DL = CurDAG->getDataLayout();
3094 Align Alignment = commonAlignment(
3095 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
3096 if ((CVal == 0 || Alignment > CVal) &&
3097 areOffsetsWithinAlignment(Base, Alignment)) {
3098 int64_t CombinedOffset = CVal + GA->getOffset();
3099 Base = Base.getOperand(0);
3100 Offset = CurDAG->getTargetGlobalAddress(
3101 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(),
3102 CombinedOffset, GA->getTargetFlags());
3103 return true;
3104 }
3105 }
3106 }
3107
3108 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
3109 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
3110 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
3111 return true;
3112 }
3113 }
3114
3115 // Handle ADD with large immediates.
3116 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
3117 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3118 assert(!isInt<12>(CVal) && "simm12 not already handled?");
3119
3120 // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
3121 // an ADDI for part of the offset and fold the rest into the load/store.
3122 // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
3123 if (CVal >= -4096 && CVal <= 4094) {
3124 int64_t Adj = CVal < 0 ? -2048 : 2047;
3125 Base = SDValue(
3126 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
3127 CurDAG->getSignedTargetConstant(Adj, DL, VT)),
3128 0);
3129 Offset = CurDAG->getSignedTargetConstant(CVal - Adj, DL, VT);
3130 return true;
3131 }
3132
3133 // For larger immediates, we might be able to save one instruction from
3134 // constant materialization by folding the Lo12 bits of the immediate into
3135 // the address. We should only do this if the ADD is only used by loads and
3136 // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
3137 // separately with the full materialized immediate creating extra
3138 // instructions.
3139 if (isWorthFoldingAdd(Addr) &&
3140 selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
3141 Offset, /*IsPrefetch=*/false)) {
3142 // Insert an ADD instruction with the materialized Hi52 bits.
3143 Base = SDValue(
3144 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
3145 0);
3146 return true;
3147 }
3148 }
3149
3150 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
3151 /*IsPrefetch=*/false))
3152 return true;
3153
3154 Base = Addr;
3155 Offset = CurDAG->getTargetConstant(0, DL, VT);
3156 return true;
3157}
3158
3159/// Similar to SelectAddrRegImm, except that the offset is restricted to uimm9.
3161 SDValue &Offset) {
3162 if (SelectAddrFrameIndex(Addr, Base, Offset))
3163 return true;
3164
3165 SDLoc DL(Addr);
3166 MVT VT = Addr.getSimpleValueType();
3167
3168 if (CurDAG->isBaseWithConstantOffset(Addr)) {
3169 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3170 if (isUInt<9>(CVal)) {
3171 Base = Addr.getOperand(0);
3172
3173 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
3174 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
3175 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
3176 return true;
3177 }
3178 }
3179
3180 Base = Addr;
3181 Offset = CurDAG->getTargetConstant(0, DL, VT);
3182 return true;
3183}
3184
3185/// Similar to SelectAddrRegImm, except that the least significant 5 bits of
3186/// Offset should be all zeros.
3188 SDValue &Offset) {
3189 if (SelectAddrFrameIndex(Addr, Base, Offset))
3190 return true;
3191
3192 SDLoc DL(Addr);
3193 MVT VT = Addr.getSimpleValueType();
3194
3195 if (CurDAG->isBaseWithConstantOffset(Addr)) {
3196 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3197 if (isInt<12>(CVal)) {
3198 Base = Addr.getOperand(0);
3199
3200 // Early-out if not a valid offset.
3201 if ((CVal & 0b11111) != 0) {
3202 Base = Addr;
3203 Offset = CurDAG->getTargetConstant(0, DL, VT);
3204 return true;
3205 }
3206
3207 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
3208 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
3209 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
3210 return true;
3211 }
3212 }
3213
3214 // Handle ADD with large immediates.
3215 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
3216 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3217 assert(!isInt<12>(CVal) && "simm12 not already handled?");
3218
3219 // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
3220 // one instruction by folding adjustment (-2048 or 2016) into the address.
3221 if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {
3222 int64_t Adj = CVal < 0 ? -2048 : 2016;
3223 int64_t AdjustedOffset = CVal - Adj;
3224 Base =
3225 SDValue(CurDAG->getMachineNode(
3226 RISCV::ADDI, DL, VT, Addr.getOperand(0),
3227 CurDAG->getSignedTargetConstant(AdjustedOffset, DL, VT)),
3228 0);
3229 Offset = CurDAG->getSignedTargetConstant(Adj, DL, VT);
3230 return true;
3231 }
3232
3233 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
3234 Offset, /*IsPrefetch=*/true)) {
3235 // Insert an ADD instruction with the materialized Hi52 bits.
3236 Base = SDValue(
3237 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
3238 0);
3239 return true;
3240 }
3241 }
3242
3243 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
3244 /*IsPrefetch=*/true))
3245 return true;
3246
3247 Base = Addr;
3248 Offset = CurDAG->getTargetConstant(0, DL, VT);
3249 return true;
3250}
3251
3252/// Return true if this a load/store that we have a RegRegScale instruction for.
3254 const RISCVSubtarget &Subtarget) {
3255 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE)
3256 return false;
3257 EVT VT = cast<MemSDNode>(User)->getMemoryVT();
3258 if (!(VT.isScalarInteger() &&
3259 (Subtarget.hasVendorXTHeadMemIdx() || Subtarget.hasVendorXqcisls())) &&
3260 !((VT == MVT::f32 || VT == MVT::f64) &&
3261 Subtarget.hasVendorXTHeadFMemIdx()))
3262 return false;
3263 // Don't allow stores of the value. It must be used as the address.
3264 if (User->getOpcode() == ISD::STORE &&
3265 cast<StoreSDNode>(User)->getValue() == Add)
3266 return false;
3267
3268 return true;
3269}
3270
3271/// Is it profitable to fold this Add into RegRegScale load/store. If \p
3272/// Shift is non-null, then we have matched a shl+add. We allow reassociating
3273/// (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2)) if there is a
3274/// single addi and we don't have a SHXADD instruction we could use.
3275/// FIXME: May still need to check how many and what kind of users the SHL has.
3277 SDValue Add,
3278 SDValue Shift = SDValue()) {
3279 bool FoundADDI = false;
3280 for (auto *User : Add->users()) {
3281 if (isRegRegScaleLoadOrStore(User, Add, Subtarget))
3282 continue;
3283
3284 // Allow a single ADDI that is used by loads/stores if we matched a shift.
3285 if (!Shift || FoundADDI || User->getOpcode() != ISD::ADD ||
3287 !isInt<12>(cast<ConstantSDNode>(User->getOperand(1))->getSExtValue()))
3288 return false;
3289
3290 FoundADDI = true;
3291
3292 // If we have a SHXADD instruction, prefer that over reassociating an ADDI.
3293 assert(Shift.getOpcode() == ISD::SHL);
3294 unsigned ShiftAmt = Shift.getConstantOperandVal(1);
3295 if (Subtarget.hasShlAdd(ShiftAmt))
3296 return false;
3297
3298 // All users of the ADDI should be load/store.
3299 for (auto *ADDIUser : User->users())
3300 if (!isRegRegScaleLoadOrStore(ADDIUser, SDValue(User, 0), Subtarget))
3301 return false;
3302 }
3303
3304 return true;
3305}
3306
3308 unsigned MaxShiftAmount,
3309 SDValue &Base, SDValue &Index,
3310 SDValue &Scale) {
3311 if (Addr.getOpcode() != ISD::ADD)
3312 return false;
3313 SDValue LHS = Addr.getOperand(0);
3314 SDValue RHS = Addr.getOperand(1);
3315
3316 EVT VT = Addr.getSimpleValueType();
3317 auto SelectShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
3318 SDValue &Shift) {
3319 if (N.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(N.getOperand(1)))
3320 return false;
3321
3322 // Only match shifts by a value in range [0, MaxShiftAmount].
3323 unsigned ShiftAmt = N.getConstantOperandVal(1);
3324 if (ShiftAmt > MaxShiftAmount)
3325 return false;
3326
3327 Index = N.getOperand(0);
3328 Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT);
3329 return true;
3330 };
3331
3332 if (auto *C1 = dyn_cast<ConstantSDNode>(RHS)) {
3333 // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
3334 if (LHS.getOpcode() == ISD::ADD &&
3335 !isa<ConstantSDNode>(LHS.getOperand(1)) &&
3336 isInt<12>(C1->getSExtValue())) {
3337 if (SelectShl(LHS.getOperand(1), Index, Scale) &&
3338 isWorthFoldingIntoRegRegScale(*Subtarget, LHS, LHS.getOperand(1))) {
3339 SDValue C1Val = CurDAG->getTargetConstant(*C1->getConstantIntValue(),
3340 SDLoc(Addr), VT);
3341 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
3342 LHS.getOperand(0), C1Val),
3343 0);
3344 return true;
3345 }
3346
3347 // Add is commutative so we need to check both operands.
3348 if (SelectShl(LHS.getOperand(0), Index, Scale) &&
3349 isWorthFoldingIntoRegRegScale(*Subtarget, LHS, LHS.getOperand(0))) {
3350 SDValue C1Val = CurDAG->getTargetConstant(*C1->getConstantIntValue(),
3351 SDLoc(Addr), VT);
3352 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
3353 LHS.getOperand(1), C1Val),
3354 0);
3355 return true;
3356 }
3357 }
3358
3359 // Don't match add with constants.
3360 // FIXME: Is this profitable for large constants that have 0s in the lower
3361 // 12 bits that we can materialize with LUI?
3362 return false;
3363 }
3364
3365 // Try to match a shift on the RHS.
3366 if (SelectShl(RHS, Index, Scale)) {
3367 if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr, RHS))
3368 return false;
3369 Base = LHS;
3370 return true;
3371 }
3372
3373 // Try to match a shift on the LHS.
3374 if (SelectShl(LHS, Index, Scale)) {
3375 if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr, LHS))
3376 return false;
3377 Base = RHS;
3378 return true;
3379 }
3380
3381 if (!isWorthFoldingIntoRegRegScale(*Subtarget, Addr))
3382 return false;
3383
3384 Base = LHS;
3385 Index = RHS;
3386 Scale = CurDAG->getTargetConstant(0, SDLoc(Addr), VT);
3387 return true;
3388}
3389
3391 unsigned MaxShiftAmount,
3392 unsigned Bits, SDValue &Base,
3393 SDValue &Index,
3394 SDValue &Scale) {
3395 if (!SelectAddrRegRegScale(Addr, MaxShiftAmount, Base, Index, Scale))
3396 return false;
3397
3398 if (Index.getOpcode() == ISD::AND) {
3399 auto *C = dyn_cast<ConstantSDNode>(Index.getOperand(1));
3400 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
3401 Index = Index.getOperand(0);
3402 return true;
3403 }
3404 }
3405
3406 return false;
3407}
3408
3410 SDValue &Offset) {
3411 if (Addr.getOpcode() != ISD::ADD)
3412 return false;
3413
3414 if (isa<ConstantSDNode>(Addr.getOperand(1)))
3415 return false;
3416
3417 Base = Addr.getOperand(0);
3418 Offset = Addr.getOperand(1);
3419 return true;
3420}
3421
3423 SDValue &ShAmt) {
3424 ShAmt = N;
3425
3426 // Peek through zext.
3427 if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)
3428 ShAmt = ShAmt.getOperand(0);
3429
3430 // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
3431 // amount. If there is an AND on the shift amount, we can bypass it if it
3432 // doesn't affect any of those bits.
3433 if (ShAmt.getOpcode() == ISD::AND &&
3434 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
3435 const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
3436
3437 // Since the max shift amount is a power of 2 we can subtract 1 to make a
3438 // mask that covers the bits needed to represent all shift amounts.
3439 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
3440 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
3441
3442 if (ShMask.isSubsetOf(AndMask)) {
3443 ShAmt = ShAmt.getOperand(0);
3444 } else {
3445 // SimplifyDemandedBits may have optimized the mask so try restoring any
3446 // bits that are known zero.
3447 KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0));
3448 if (!ShMask.isSubsetOf(AndMask | Known.Zero))
3449 return true;
3450 ShAmt = ShAmt.getOperand(0);
3451 }
3452 }
3453
3454 if (ShAmt.getOpcode() == ISD::ADD &&
3455 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
3456 uint64_t Imm = ShAmt.getConstantOperandVal(1);
3457 // If we are shifting by X+N where N == 0 mod Size, then just shift by X
3458 // to avoid the ADD.
3459 if (Imm != 0 && Imm % ShiftWidth == 0) {
3460 ShAmt = ShAmt.getOperand(0);
3461 return true;
3462 }
3463 } else if (ShAmt.getOpcode() == ISD::SUB &&
3464 isa<ConstantSDNode>(ShAmt.getOperand(0))) {
3465 uint64_t Imm = ShAmt.getConstantOperandVal(0);
3466 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
3467 // generate a NEG instead of a SUB of a constant.
3468 if (Imm != 0 && Imm % ShiftWidth == 0) {
3469 SDLoc DL(ShAmt);
3470 EVT VT = ShAmt.getValueType();
3471 SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);
3472 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
3473 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
3474 ShAmt.getOperand(1));
3475 ShAmt = SDValue(Neg, 0);
3476 return true;
3477 }
3478 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3479 // to generate a NOT instead of a SUB of a constant.
3480 if (Imm % ShiftWidth == ShiftWidth - 1) {
3481 SDLoc DL(ShAmt);
3482 EVT VT = ShAmt.getValueType();
3483 MachineSDNode *Not = CurDAG->getMachineNode(
3484 RISCV::XORI, DL, VT, ShAmt.getOperand(1),
3485 CurDAG->getAllOnesConstant(DL, VT, /*isTarget=*/true));
3486 ShAmt = SDValue(Not, 0);
3487 return true;
3488 }
3489 }
3490
3491 return true;
3492}
3493
3494/// RISC-V doesn't have general instructions for integer setne/seteq, but we can
3495/// check for equality with 0. This function emits instructions that convert the
3496/// seteq/setne into something that can be compared with 0.
3497/// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.
3498/// ISD::SETNE).
3500 SDValue &Val) {
3501 assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
3502 "Unexpected condition code!");
3503
3504 // We're looking for a setcc.
3505 if (N->getOpcode() != ISD::SETCC)
3506 return false;
3507
3508 // Must be an equality comparison.
3509 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
3510 if (CCVal != ExpectedCCVal)
3511 return false;
3512
3513 SDValue LHS = N->getOperand(0);
3514 SDValue RHS = N->getOperand(1);
3515
3516 if (!LHS.getValueType().isScalarInteger())
3517 return false;
3518
3519 // If the RHS side is 0, we don't need any extra instructions, return the LHS.
3520 if (isNullConstant(RHS)) {
3521 Val = LHS;
3522 return true;
3523 }
3524
3525 SDLoc DL(N);
3526
3527 if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
3528 int64_t CVal = C->getSExtValue();
3529 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
3530 // non-zero otherwise.
3531 if (CVal == -2048) {
3532 Val = SDValue(
3533 CurDAG->getMachineNode(
3534 RISCV::XORI, DL, N->getValueType(0), LHS,
3535 CurDAG->getSignedTargetConstant(CVal, DL, N->getValueType(0))),
3536 0);
3537 return true;
3538 }
3539 // If the RHS is [-2047,2048], we can use addi/addiw with -RHS to produce 0
3540 // if the LHS is equal to the RHS and non-zero otherwise.
3541 if (isInt<12>(CVal) || CVal == 2048) {
3542 unsigned Opc = RISCV::ADDI;
3543 if (LHS.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3544 cast<VTSDNode>(LHS.getOperand(1))->getVT() == MVT::i32) {
3545 Opc = RISCV::ADDIW;
3546 LHS = LHS.getOperand(0);
3547 }
3548
3549 Val = SDValue(CurDAG->getMachineNode(Opc, DL, N->getValueType(0), LHS,
3550 CurDAG->getSignedTargetConstant(
3551 -CVal, DL, N->getValueType(0))),
3552 0);
3553 return true;
3554 }
3555 if (isPowerOf2_64(CVal) && Subtarget->hasStdExtZbs()) {
3556 Val = SDValue(
3557 CurDAG->getMachineNode(
3558 RISCV::BINVI, DL, N->getValueType(0), LHS,
3559 CurDAG->getTargetConstant(Log2_64(CVal), DL, N->getValueType(0))),
3560 0);
3561 return true;
3562 }
3563 // Same as the addi case above but for larger immediates (signed 26-bit) use
3564 // the QC_E_ADDI instruction from the Xqcilia extension, if available. Avoid
3565 // anything which can be done with a single lui as it might be compressible.
3566 if (Subtarget->hasVendorXqcilia() && isInt<26>(CVal) &&
3567 (CVal & 0xFFF) != 0) {
3568 Val = SDValue(
3569 CurDAG->getMachineNode(
3570 RISCV::QC_E_ADDI, DL, N->getValueType(0), LHS,
3571 CurDAG->getSignedTargetConstant(-CVal, DL, N->getValueType(0))),
3572 0);
3573 return true;
3574 }
3575 }
3576
3577 // If nothing else we can XOR the LHS and RHS to produce zero if they are
3578 // equal and a non-zero value if they aren't.
3579 Val = SDValue(
3580 CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0);
3581 return true;
3582}
3583
3585 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3586 cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) {
3587 Val = N.getOperand(0);
3588 return true;
3589 }
3590
3591 auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
3592 if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1)))
3593 return N;
3594
3595 SDValue N0 = N.getOperand(0);
3596 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
3597 N.getConstantOperandVal(1) == ShiftAmt &&
3598 N0.getConstantOperandVal(1) == ShiftAmt)
3599 return N0.getOperand(0);
3600
3601 return N;
3602 };
3603
3604 MVT VT = N.getSimpleValueType();
3605 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) {
3606 Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
3607 return true;
3608 }
3609
3610 return false;
3611}
3612
3614 if (N.getOpcode() == ISD::AND) {
3615 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
3616 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
3617 Val = N.getOperand(0);
3618 return true;
3619 }
3620 }
3621 MVT VT = N.getSimpleValueType();
3622 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits);
3623 if (CurDAG->MaskedValueIsZero(N, Mask)) {
3624 Val = N;
3625 return true;
3626 }
3627
3628 return false;
3629}
3630
3631/// Look for various patterns that can be done with a SHL that can be folded
3632/// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
3633/// SHXADD we are trying to match.
3635 SDValue &Val) {
3636 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
3637 SDValue N0 = N.getOperand(0);
3638
3639 if (bool LeftShift = N0.getOpcode() == ISD::SHL;
3640 (LeftShift || N0.getOpcode() == ISD::SRL) &&
3642 uint64_t Mask = N.getConstantOperandVal(1);
3643 unsigned C2 = N0.getConstantOperandVal(1);
3644
3645 unsigned XLen = Subtarget->getXLen();
3646 if (LeftShift)
3647 Mask &= maskTrailingZeros<uint64_t>(C2);
3648 else
3649 Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
3650
3651 if (isShiftedMask_64(Mask)) {
3652 unsigned Leading = XLen - llvm::bit_width(Mask);
3653 unsigned Trailing = llvm::countr_zero(Mask);
3654 if (Trailing != ShAmt)
3655 return false;
3656
3657 unsigned Opcode;
3658 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
3659 // leading zeros and c3 trailing zeros. We can use an SRLI by c3-c2
3660 // followed by a SHXADD with c3 for the X amount.
3661 if (LeftShift && Leading == 0 && C2 < Trailing)
3662 Opcode = RISCV::SRLI;
3663 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with 32-c2
3664 // leading zeros and c3 trailing zeros. We can use an SRLIW by c3-c2
3665 // followed by a SHXADD with c3 for the X amount.
3666 else if (LeftShift && Leading == 32 - C2 && C2 < Trailing)
3667 Opcode = RISCV::SRLIW;
3668 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
3669 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
3670 // followed by a SHXADD using c3 for the X amount.
3671 else if (!LeftShift && Leading == C2)
3672 Opcode = RISCV::SRLI;
3673 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with 32+c2
3674 // leading zeros and c3 trailing zeros. We can use an SRLIW by c2+c3
3675 // followed by a SHXADD using c3 for the X amount.
3676 else if (!LeftShift && Leading == 32 + C2)
3677 Opcode = RISCV::SRLIW;
3678 else
3679 return false;
3680
3681 SDLoc DL(N);
3682 EVT VT = N.getValueType();
3683 ShAmt = LeftShift ? Trailing - C2 : Trailing + C2;
3684 Val = SDValue(
3685 CurDAG->getMachineNode(Opcode, DL, VT, N0.getOperand(0),
3686 CurDAG->getTargetConstant(ShAmt, DL, VT)),
3687 0);
3688 return true;
3689 }
3690 } else if (N0.getOpcode() == ISD::SRA && N0.hasOneUse() &&
3692 uint64_t Mask = N.getConstantOperandVal(1);
3693 unsigned C2 = N0.getConstantOperandVal(1);
3694
3695 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
3696 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
3697 // use (srli (srai y, c2 - c3), c3 + c4) followed by a SHXADD with c4 as
3698 // the X amount.
3699 if (isShiftedMask_64(Mask)) {
3700 unsigned XLen = Subtarget->getXLen();
3701 unsigned Leading = XLen - llvm::bit_width(Mask);
3702 unsigned Trailing = llvm::countr_zero(Mask);
3703 if (C2 > Leading && Leading > 0 && Trailing == ShAmt) {
3704 SDLoc DL(N);
3705 EVT VT = N.getValueType();
3706 Val = SDValue(CurDAG->getMachineNode(
3707 RISCV::SRAI, DL, VT, N0.getOperand(0),
3708 CurDAG->getTargetConstant(C2 - Leading, DL, VT)),
3709 0);
3710 Val = SDValue(CurDAG->getMachineNode(
3711 RISCV::SRLI, DL, VT, Val,
3712 CurDAG->getTargetConstant(Leading + ShAmt, DL, VT)),
3713 0);
3714 return true;
3715 }
3716 }
3717 }
3718 } else if (bool LeftShift = N.getOpcode() == ISD::SHL;
3719 (LeftShift || N.getOpcode() == ISD::SRL) &&
3720 isa<ConstantSDNode>(N.getOperand(1))) {
3721 SDValue N0 = N.getOperand(0);
3722 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
3724 uint64_t Mask = N0.getConstantOperandVal(1);
3725 if (isShiftedMask_64(Mask)) {
3726 unsigned C1 = N.getConstantOperandVal(1);
3727 unsigned XLen = Subtarget->getXLen();
3728 unsigned Leading = XLen - llvm::bit_width(Mask);
3729 unsigned Trailing = llvm::countr_zero(Mask);
3730 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
3731 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
3732 if (LeftShift && Leading == 32 && Trailing > 0 &&
3733 (Trailing + C1) == ShAmt) {
3734 SDLoc DL(N);
3735 EVT VT = N.getValueType();
3736 Val = SDValue(CurDAG->getMachineNode(
3737 RISCV::SRLIW, DL, VT, N0.getOperand(0),
3738 CurDAG->getTargetConstant(Trailing, DL, VT)),
3739 0);
3740 return true;
3741 }
3742 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
3743 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
3744 if (!LeftShift && Leading == 32 && Trailing > C1 &&
3745 (Trailing - C1) == ShAmt) {
3746 SDLoc DL(N);
3747 EVT VT = N.getValueType();
3748 Val = SDValue(CurDAG->getMachineNode(
3749 RISCV::SRLIW, DL, VT, N0.getOperand(0),
3750 CurDAG->getTargetConstant(Trailing, DL, VT)),
3751 0);
3752 return true;
3753 }
3754 }
3755 }
3756 }
3757
3758 return false;
3759}
3760
3761/// Look for various patterns that can be done with a SHL that can be folded
3762/// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
3763/// SHXADD_UW we are trying to match.
3765 SDValue &Val) {
3766 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) &&
3767 N.hasOneUse()) {
3768 SDValue N0 = N.getOperand(0);
3769 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
3770 N0.hasOneUse()) {
3771 uint64_t Mask = N.getConstantOperandVal(1);
3772 unsigned C2 = N0.getConstantOperandVal(1);
3773
3774 Mask &= maskTrailingZeros<uint64_t>(C2);
3775
3776 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
3777 // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
3778 // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
3779 if (isShiftedMask_64(Mask)) {
3780 unsigned Leading = llvm::countl_zero(Mask);
3781 unsigned Trailing = llvm::countr_zero(Mask);
3782 if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
3783 SDLoc DL(N);
3784 EVT VT = N.getValueType();
3785 Val = SDValue(CurDAG->getMachineNode(
3786 RISCV::SLLI, DL, VT, N0.getOperand(0),
3787 CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)),
3788 0);
3789 return true;
3790 }
3791 }
3792 }
3793 }
3794
3795 return false;
3796}
3797
3799 assert(N->getOpcode() == ISD::OR || N->getOpcode() == RISCVISD::OR_VL);
3800 if (N->getFlags().hasDisjoint())
3801 return true;
3802 return CurDAG->haveNoCommonBitsSet(N->getOperand(0), N->getOperand(1));
3803}
3804
3805bool RISCVDAGToDAGISel::selectImm64IfCheaper(int64_t Imm, int64_t OrigImm,
3806 SDValue N, SDValue &Val) {
3807 int OrigCost = RISCVMatInt::getIntMatCost(APInt(64, OrigImm), 64, *Subtarget,
3808 /*CompressionCost=*/true);
3809 int Cost = RISCVMatInt::getIntMatCost(APInt(64, Imm), 64, *Subtarget,
3810 /*CompressionCost=*/true);
3811 if (OrigCost <= Cost)
3812 return false;
3813
3814 Val = selectImm(CurDAG, SDLoc(N), N->getSimpleValueType(0), Imm, *Subtarget);
3815 return true;
3816}
3817
3819 if (!isa<ConstantSDNode>(N))
3820 return false;
3821 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
3822 if ((Imm >> 31) != 1)
3823 return false;
3824
3825 for (const SDNode *U : N->users()) {
3826 switch (U->getOpcode()) {
3827 case ISD::ADD:
3828 break;
3829 case ISD::OR:
3830 if (orDisjoint(U))
3831 break;
3832 return false;
3833 default:
3834 return false;
3835 }
3836 }
3837
3838 return selectImm64IfCheaper(0xffffffff00000000 | Imm, Imm, N, Val);
3839}
3840
3842 if (!isa<ConstantSDNode>(N))
3843 return false;
3844 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
3845 if (isInt<32>(Imm))
3846 return false;
3847
3848 for (const SDNode *U : N->users()) {
3849 switch (U->getOpcode()) {
3850 case ISD::ADD:
3851 break;
3852 case RISCVISD::VMV_V_X_VL:
3853 if (!all_of(U->users(), [](const SDNode *V) {
3854 return V->getOpcode() == ISD::ADD ||
3855 V->getOpcode() == RISCVISD::ADD_VL;
3856 }))
3857 return false;
3858 break;
3859 default:
3860 return false;
3861 }
3862 }
3863
3864 return selectImm64IfCheaper(-Imm, Imm, N, Val);
3865}
3866
3868 if (!isa<ConstantSDNode>(N))
3869 return false;
3870 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
3871
3872 // For 32-bit signed constants, we can only substitute LUI+ADDI with LUI.
3873 if (isInt<32>(Imm) && ((Imm & 0xfff) != 0xfff || Imm == -1))
3874 return false;
3875
3876 // Abandon this transform if the constant is needed elsewhere.
3877 for (const SDNode *U : N->users()) {
3878 switch (U->getOpcode()) {
3879 case ISD::AND:
3880 case ISD::OR:
3881 case ISD::XOR:
3882 if (!(Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbkb()))
3883 return false;
3884 break;
3885 case RISCVISD::VMV_V_X_VL:
3886 if (!Subtarget->hasStdExtZvkb())
3887 return false;
3888 if (!all_of(U->users(), [](const SDNode *V) {
3889 return V->getOpcode() == ISD::AND ||
3890 V->getOpcode() == RISCVISD::AND_VL;
3891 }))
3892 return false;
3893 break;
3894 default:
3895 return false;
3896 }
3897 }
3898
3899 if (isInt<32>(Imm)) {
3900 Val =
3901 selectImm(CurDAG, SDLoc(N), N->getSimpleValueType(0), ~Imm, *Subtarget);
3902 return true;
3903 }
3904
3905 // For 64-bit constants, the instruction sequences get complex,
3906 // so we select inverted only if it's cheaper.
3907 return selectImm64IfCheaper(~Imm, Imm, N, Val);
3908}
3909
3910static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
3911 unsigned Bits,
3912 const TargetInstrInfo *TII) {
3913 unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode());
3914
3915 if (!MCOpcode)
3916 return false;
3917
3918 const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
3919 const uint64_t TSFlags = MCID.TSFlags;
3920 if (!RISCVII::hasSEWOp(TSFlags))
3921 return false;
3922 assert(RISCVII::hasVLOp(TSFlags));
3923
3924 unsigned ChainOpIdx = User->getNumOperands() - 1;
3925 bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other;
3926 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
3927 unsigned VLIdx = User->getNumOperands() - HasVecPolicyOp - HasChainOp - 2;
3928 const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1);
3929
3930 if (UserOpNo == VLIdx)
3931 return false;
3932
3933 auto NumDemandedBits =
3934 RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW);
3935 return NumDemandedBits && Bits >= *NumDemandedBits;
3936}
3937
3938// Return true if all users of this SDNode* only consume the lower \p Bits.
3939// This can be used to form W instructions for add/sub/mul/shl even when the
3940// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
3941// SimplifyDemandedBits has made it so some users see a sext_inreg and some
3942// don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
3943// the add/sub/mul/shl to become non-W instructions. By checking the users we
3944// may be able to use a W instruction and CSE with the other instruction if
3945// this has happened. We could try to detect that the CSE opportunity exists
3946// before doing this, but that would be more complicated.
3948 const unsigned Depth) const {
3949 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
3950 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
3951 Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
3952 Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
3953 Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
3954 isa<ConstantSDNode>(Node) || Depth != 0) &&
3955 "Unexpected opcode");
3956
3958 return false;
3959
3960 // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
3961 // the VT. Ensure the type is scalar to avoid wasting time on vectors.
3962 if (Depth == 0 && !Node->getValueType(0).isScalarInteger())
3963 return false;
3964
3965 for (SDUse &Use : Node->uses()) {
3966 SDNode *User = Use.getUser();
3967 // Users of this node should have already been instruction selected
3968 if (!User->isMachineOpcode())
3969 return false;
3970
3971 // TODO: Add more opcodes?
3972 switch (User->getMachineOpcode()) {
3973 default:
3975 break;
3976 return false;
3977 case RISCV::ADDW:
3978 case RISCV::ADDIW:
3979 case RISCV::SUBW:
3980 case RISCV::MULW:
3981 case RISCV::SLLW:
3982 case RISCV::SLLIW:
3983 case RISCV::SRAW:
3984 case RISCV::SRAIW:
3985 case RISCV::SRLW:
3986 case RISCV::SRLIW:
3987 case RISCV::DIVW:
3988 case RISCV::DIVUW:
3989 case RISCV::REMW:
3990 case RISCV::REMUW:
3991 case RISCV::ROLW:
3992 case RISCV::RORW:
3993 case RISCV::RORIW:
3994 case RISCV::CLZW:
3995 case RISCV::CTZW:
3996 case RISCV::CPOPW:
3997 case RISCV::SLLI_UW:
3998 case RISCV::FMV_W_X:
3999 case RISCV::FCVT_H_W:
4000 case RISCV::FCVT_H_W_INX:
4001 case RISCV::FCVT_H_WU:
4002 case RISCV::FCVT_H_WU_INX:
4003 case RISCV::FCVT_S_W:
4004 case RISCV::FCVT_S_W_INX:
4005 case RISCV::FCVT_S_WU:
4006 case RISCV::FCVT_S_WU_INX:
4007 case RISCV::FCVT_D_W:
4008 case RISCV::FCVT_D_W_INX:
4009 case RISCV::FCVT_D_WU:
4010 case RISCV::FCVT_D_WU_INX:
4011 case RISCV::TH_REVW:
4012 case RISCV::TH_SRRIW:
4013 if (Bits >= 32)
4014 break;
4015 return false;
4016 case RISCV::SLL:
4017 case RISCV::SRA:
4018 case RISCV::SRL:
4019 case RISCV::ROL:
4020 case RISCV::ROR:
4021 case RISCV::BSET:
4022 case RISCV::BCLR:
4023 case RISCV::BINV:
4024 // Shift amount operands only use log2(Xlen) bits.
4025 if (Use.getOperandNo() == 1 && Bits >= Log2_32(Subtarget->getXLen()))
4026 break;
4027 return false;
4028 case RISCV::SLLI:
4029 // SLLI only uses the lower (XLen - ShAmt) bits.
4030 if (Bits >= Subtarget->getXLen() - User->getConstantOperandVal(1))
4031 break;
4032 return false;
4033 case RISCV::ANDI:
4034 if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1)))
4035 break;
4036 goto RecCheck;
4037 case RISCV::ORI: {
4038 uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
4039 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
4040 break;
4041 [[fallthrough]];
4042 }
4043 case RISCV::AND:
4044 case RISCV::OR:
4045 case RISCV::XOR:
4046 case RISCV::XORI:
4047 case RISCV::ANDN:
4048 case RISCV::ORN:
4049 case RISCV::XNOR:
4050 case RISCV::SH1ADD:
4051 case RISCV::SH2ADD:
4052 case RISCV::SH3ADD:
4053 RecCheck:
4054 if (hasAllNBitUsers(User, Bits, Depth + 1))
4055 break;
4056 return false;
4057 case RISCV::SRLI: {
4058 unsigned ShAmt = User->getConstantOperandVal(1);
4059 // If we are shifting right by less than Bits, and users don't demand any
4060 // bits that were shifted into [Bits-1:0], then we can consider this as an
4061 // N-Bit user.
4062 if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1))
4063 break;
4064 return false;
4065 }
4066 case RISCV::SEXT_B:
4067 case RISCV::PACKH:
4068 if (Bits >= 8)
4069 break;
4070 return false;
4071 case RISCV::SEXT_H:
4072 case RISCV::FMV_H_X:
4073 case RISCV::ZEXT_H_RV32:
4074 case RISCV::ZEXT_H_RV64:
4075 case RISCV::PACKW:
4076 if (Bits >= 16)
4077 break;
4078 return false;
4079 case RISCV::PACK:
4080 if (Bits >= (Subtarget->getXLen() / 2))
4081 break;
4082 return false;
4083 case RISCV::ADD_UW:
4084 case RISCV::SH1ADD_UW:
4085 case RISCV::SH2ADD_UW:
4086 case RISCV::SH3ADD_UW:
4087 // The first operand to add.uw/shXadd.uw is implicitly zero extended from
4088 // 32 bits.
4089 if (Use.getOperandNo() == 0 && Bits >= 32)
4090 break;
4091 return false;
4092 case RISCV::SB:
4093 if (Use.getOperandNo() == 0 && Bits >= 8)
4094 break;
4095 return false;
4096 case RISCV::SH:
4097 if (Use.getOperandNo() == 0 && Bits >= 16)
4098 break;
4099 return false;
4100 case RISCV::SW:
4101 if (Use.getOperandNo() == 0 && Bits >= 32)
4102 break;
4103 return false;
4104 case RISCV::TH_EXT:
4105 case RISCV::TH_EXTU: {
4106 unsigned Msb = User->getConstantOperandVal(1);
4107 unsigned Lsb = User->getConstantOperandVal(2);
4108 // Behavior of Msb < Lsb is not well documented.
4109 if (Msb >= Lsb && Bits > Msb)
4110 break;
4111 return false;
4112 }
4113 }
4114 }
4115
4116 return true;
4117}
4118
4119// Select a constant that can be represented as (sign_extend(imm5) << imm2).
4121 SDValue &Shl2) {
4122 auto *C = dyn_cast<ConstantSDNode>(N);
4123 if (!C)
4124 return false;
4125
4126 int64_t Offset = C->getSExtValue();
4127 for (unsigned Shift = 0; Shift < 4; Shift++) {
4128 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0)) {
4129 EVT VT = N->getValueType(0);
4130 Simm5 = CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(N), VT);
4131 Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), VT);
4132 return true;
4133 }
4134 }
4135
4136 return false;
4137}
4138
4139// Select VL as a 5 bit immediate or a value that will become a register. This
4140// allows us to choose between VSETIVLI or VSETVLI later.
4142 auto *C = dyn_cast<ConstantSDNode>(N);
4143 if (C && isUInt<5>(C->getZExtValue())) {
4144 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
4145 N->getValueType(0));
4146 } else if (C && C->isAllOnes()) {
4147 // Treat all ones as VLMax.
4148 VL = CurDAG->getSignedTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
4149 N->getValueType(0));
4150 } else if (isa<RegisterSDNode>(N) &&
4151 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {
4152 // All our VL operands use an operand that allows GPRNoX0 or an immediate
4153 // as the register class. Convert X0 to a special immediate to pass the
4154 // MachineVerifier. This is recognized specially by the vsetvli insertion
4155 // pass.
4156 VL = CurDAG->getSignedTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
4157 N->getValueType(0));
4158 } else {
4159 VL = N;
4160 }
4161
4162 return true;
4163}
4164
4166 if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {
4167 if (!N.getOperand(0).isUndef())
4168 return SDValue();
4169 N = N.getOperand(1);
4170 }
4171 SDValue Splat = N;
4172 if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&
4173 Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||
4174 !Splat.getOperand(0).isUndef())
4175 return SDValue();
4176 assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");
4177 return Splat;
4178}
4179
4182 if (!Splat)
4183 return false;
4184
4185 SplatVal = Splat.getOperand(1);
4186 return true;
4187}
4188
4190 SelectionDAG &DAG,
4191 const RISCVSubtarget &Subtarget,
4192 std::function<bool(int64_t)> ValidateImm,
4193 bool Decrement = false) {
4195 if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1)))
4196 return false;
4197
4198 const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();
4199 assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&
4200 "Unexpected splat operand type");
4201
4202 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
4203 // type is wider than the resulting vector element type: an implicit
4204 // truncation first takes place. Therefore, perform a manual
4205 // truncation/sign-extension in order to ignore any truncated bits and catch
4206 // any zero-extended immediate.
4207 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
4208 // sign-extending to (XLenVT -1).
4209 APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize);
4210
4211 int64_t SplatImm = SplatConst.getSExtValue();
4212
4213 if (!ValidateImm(SplatImm))
4214 return false;
4215
4216 if (Decrement)
4217 SplatImm -= 1;
4218
4219 SplatVal =
4220 DAG.getSignedTargetConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT());
4221 return true;
4222}
4223
4225 return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget,
4226 [](int64_t Imm) { return isInt<5>(Imm); });
4227}
4228
4230 return selectVSplatImmHelper(
4231 N, SplatVal, *CurDAG, *Subtarget,
4232 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; },
4233 /*Decrement=*/true);
4234}
4235
4237 return selectVSplatImmHelper(
4238 N, SplatVal, *CurDAG, *Subtarget,
4239 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; },
4240 /*Decrement=*/false);
4241}
4242
4244 SDValue &SplatVal) {
4245 return selectVSplatImmHelper(
4246 N, SplatVal, *CurDAG, *Subtarget,
4247 [](int64_t Imm) {
4248 return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16);
4249 },
4250 /*Decrement=*/true);
4251}
4252
4254 SDValue &SplatVal) {
4255 return selectVSplatImmHelper(
4256 N, SplatVal, *CurDAG, *Subtarget,
4257 [Bits](int64_t Imm) { return isUIntN(Bits, Imm); });
4258}
4259
4262 return Splat && selectNegImm(Splat.getOperand(1), SplatVal);
4263}
4264
4266 auto IsExtOrTrunc = [](SDValue N) {
4267 switch (N->getOpcode()) {
4268 case ISD::SIGN_EXTEND:
4269 case ISD::ZERO_EXTEND:
4270 // There's no passthru on these _VL nodes so any VL/mask is ok, since any
4271 // inactive elements will be undef.
4272 case RISCVISD::TRUNCATE_VECTOR_VL:
4273 case RISCVISD::VSEXT_VL:
4274 case RISCVISD::VZEXT_VL:
4275 return true;
4276 default:
4277 return false;
4278 }
4279 };
4280
4281 // We can have multiple nested nodes, so unravel them all if needed.
4282 while (IsExtOrTrunc(N)) {
4283 if (!N.hasOneUse() || N.getScalarValueSizeInBits() < 8)
4284 return false;
4285 N = N->getOperand(0);
4286 }
4287
4288 return selectVSplat(N, SplatVal);
4289}
4290
4292 // Allow bitcasts from XLenVT -> FP.
4293 if (N.getOpcode() == ISD::BITCAST &&
4294 N.getOperand(0).getValueType() == Subtarget->getXLenVT()) {
4295 Imm = N.getOperand(0);
4296 return true;
4297 }
4298 // Allow moves from XLenVT to FP.
4299 if (N.getOpcode() == RISCVISD::FMV_H_X ||
4300 N.getOpcode() == RISCVISD::FMV_W_X_RV64) {
4301 Imm = N.getOperand(0);
4302 return true;
4303 }
4304
4305 // Otherwise, look for FP constants that can materialized with scalar int.
4307 if (!CFP)
4308 return false;
4309 const APFloat &APF = CFP->getValueAPF();
4310 // td can handle +0.0 already.
4311 if (APF.isPosZero())
4312 return false;
4313
4314 MVT VT = CFP->getSimpleValueType(0);
4315
4316 MVT XLenVT = Subtarget->getXLenVT();
4317 if (VT == MVT::f64 && !Subtarget->is64Bit()) {
4318 assert(APF.isNegZero() && "Unexpected constant.");
4319 return false;
4320 }
4321 SDLoc DL(N);
4322 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
4323 *Subtarget);
4324 return true;
4325}
4326
4328 SDValue &Imm) {
4329 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
4330 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);
4331
4332 if (!isInt<5>(ImmVal))
4333 return false;
4334
4335 Imm = CurDAG->getSignedTargetConstant(ImmVal, SDLoc(N),
4336 Subtarget->getXLenVT());
4337 return true;
4338 }
4339
4340 return false;
4341}
4342
4343// Try to remove sext.w if the input is a W instruction or can be made into
4344// a W instruction cheaply.
4345bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
4346 // Look for the sext.w pattern, addiw rd, rs1, 0.
4347 if (N->getMachineOpcode() != RISCV::ADDIW ||
4348 !isNullConstant(N->getOperand(1)))
4349 return false;
4350
4351 SDValue N0 = N->getOperand(0);
4352 if (!N0.isMachineOpcode())
4353 return false;
4354
4355 switch (N0.getMachineOpcode()) {
4356 default:
4357 break;
4358 case RISCV::ADD:
4359 case RISCV::ADDI:
4360 case RISCV::SUB:
4361 case RISCV::MUL:
4362 case RISCV::SLLI: {
4363 // Convert sext.w+add/sub/mul to their W instructions. This will create
4364 // a new independent instruction. This improves latency.
4365 unsigned Opc;
4366 switch (N0.getMachineOpcode()) {
4367 default:
4368 llvm_unreachable("Unexpected opcode!");
4369 case RISCV::ADD: Opc = RISCV::ADDW; break;
4370 case RISCV::ADDI: Opc = RISCV::ADDIW; break;
4371 case RISCV::SUB: Opc = RISCV::SUBW; break;
4372 case RISCV::MUL: Opc = RISCV::MULW; break;
4373 case RISCV::SLLI: Opc = RISCV::SLLIW; break;
4374 }
4375
4376 SDValue N00 = N0.getOperand(0);
4377 SDValue N01 = N0.getOperand(1);
4378
4379 // Shift amount needs to be uimm5.
4380 if (N0.getMachineOpcode() == RISCV::SLLI &&
4381 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
4382 break;
4383
4384 SDNode *Result =
4385 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
4386 N00, N01);
4387 ReplaceUses(N, Result);
4388 return true;
4389 }
4390 case RISCV::ADDW:
4391 case RISCV::ADDIW:
4392 case RISCV::SUBW:
4393 case RISCV::MULW:
4394 case RISCV::SLLIW:
4395 case RISCV::PACKW:
4396 case RISCV::TH_MULAW:
4397 case RISCV::TH_MULAH:
4398 case RISCV::TH_MULSW:
4399 case RISCV::TH_MULSH:
4400 if (N0.getValueType() == MVT::i32)
4401 break;
4402
4403 // Result is already sign extended just remove the sext.w.
4404 // NOTE: We only handle the nodes that are selected with hasAllWUsers.
4405 ReplaceUses(N, N0.getNode());
4406 return true;
4407 }
4408
4409 return false;
4410}
4411
4412static bool usesAllOnesMask(SDValue MaskOp) {
4413 const auto IsVMSet = [](unsigned Opc) {
4414 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
4415 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
4416 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
4417 Opc == RISCV::PseudoVMSET_M_B8;
4418 };
4419
4420 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
4421 // undefined behaviour if it's the wrong bitwidth, so we could choose to
4422 // assume that it's all-ones? Same applies to its VL.
4423 return MaskOp->isMachineOpcode() && IsVMSet(MaskOp.getMachineOpcode());
4424}
4425
4426static bool isImplicitDef(SDValue V) {
4427 if (!V.isMachineOpcode())
4428 return false;
4429 if (V.getMachineOpcode() == TargetOpcode::REG_SEQUENCE) {
4430 for (unsigned I = 1; I < V.getNumOperands(); I += 2)
4431 if (!isImplicitDef(V.getOperand(I)))
4432 return false;
4433 return true;
4434 }
4435 return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
4436}
4437
4438// Optimize masked RVV pseudo instructions with a known all-ones mask to their
4439// corresponding "unmasked" pseudo versions.
4440bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
4441 const RISCV::RISCVMaskedPseudoInfo *I =
4442 RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
4443 if (!I)
4444 return false;
4445
4446 unsigned MaskOpIdx = I->MaskOpIdx;
4447 if (!usesAllOnesMask(N->getOperand(MaskOpIdx)))
4448 return false;
4449
4450 // There are two classes of pseudos in the table - compares and
4451 // everything else. See the comment on RISCVMaskedPseudo for details.
4452 const unsigned Opc = I->UnmaskedPseudo;
4453 const MCInstrDesc &MCID = TII->get(Opc);
4454 const bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MCID);
4455
4456 const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode());
4457 const bool MaskedHasPassthru = RISCVII::isFirstDefTiedToFirstUse(MaskedMCID);
4458
4459 assert((RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) ||
4461 "Unmasked pseudo has policy but masked pseudo doesn't?");
4462 assert(RISCVII::hasVecPolicyOp(MCID.TSFlags) == HasPassthru &&
4463 "Unexpected pseudo structure");
4464 assert(!(HasPassthru && !MaskedHasPassthru) &&
4465 "Unmasked pseudo has passthru but masked pseudo doesn't?");
4466
4468 // Skip the passthru operand at index 0 if the unmasked don't have one.
4469 bool ShouldSkip = !HasPassthru && MaskedHasPassthru;
4470 bool DropPolicy = !RISCVII::hasVecPolicyOp(MCID.TSFlags) &&
4471 RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags);
4472 bool HasChainOp =
4473 N->getOperand(N->getNumOperands() - 1).getValueType() == MVT::Other;
4474 unsigned LastOpNum = N->getNumOperands() - 1 - HasChainOp;
4475 for (unsigned I = ShouldSkip, E = N->getNumOperands(); I != E; I++) {
4476 // Skip the mask
4477 SDValue Op = N->getOperand(I);
4478 if (I == MaskOpIdx)
4479 continue;
4480 if (DropPolicy && I == LastOpNum)
4481 continue;
4482 Ops.push_back(Op);
4483 }
4484
4485 MachineSDNode *Result =
4486 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4487
4488 if (!N->memoperands_empty())
4489 CurDAG->setNodeMemRefs(Result, N->memoperands());
4490
4491 Result->setFlags(N->getFlags());
4492 ReplaceUses(N, Result);
4493
4494 return true;
4495}
4496
4497/// If our passthru is an implicit_def, use noreg instead. This side
4498/// steps issues with MachineCSE not being able to CSE expressions with
4499/// IMPLICIT_DEF operands while preserving the semantic intent. See
4500/// pr64282 for context. Note that this transform is the last one
4501/// performed at ISEL DAG to DAG.
4502bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
4503 bool MadeChange = false;
4504 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
4505
4506 while (Position != CurDAG->allnodes_begin()) {
4507 SDNode *N = &*--Position;
4508 if (N->use_empty() || !N->isMachineOpcode())
4509 continue;
4510
4511 const unsigned Opc = N->getMachineOpcode();
4512 if (!RISCVVPseudosTable::getPseudoInfo(Opc) ||
4514 !isImplicitDef(N->getOperand(0)))
4515 continue;
4516
4518 Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0)));
4519 for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
4520 SDValue Op = N->getOperand(I);
4521 Ops.push_back(Op);
4522 }
4523
4524 MachineSDNode *Result =
4525 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4526 Result->setFlags(N->getFlags());
4527 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands());
4528 ReplaceUses(N, Result);
4529 MadeChange = true;
4530 }
4531 return MadeChange;
4532}
4533
4534
4535// This pass converts a legalized DAG into a RISCV-specific DAG, ready
4536// for instruction scheduling.
4538 CodeGenOptLevel OptLevel) {
4539 return new RISCVDAGToDAGISelLegacy(TM, OptLevel);
4540}
4541
4543
4548
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define DEBUG_TYPE
const HexagonInstrInfo * TII
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:58
Register const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
static bool getVal(MDTuple *MD, const char *Key, uint64_t &Val)
static bool usesAllOnesMask(SDValue MaskOp)
static Register getTileReg(uint64_t TileNum)
static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, int64_t Imm, const RISCVSubtarget &Subtarget)
static bool isRegRegScaleLoadOrStore(SDNode *User, SDValue Add, const RISCVSubtarget &Subtarget)
Return true if this a load/store that we have a RegRegScale instruction for.
#define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix)
static bool isWorthFoldingAdd(SDValue Add)
static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, RISCVMatInt::InstSeq &Seq)
static bool isImplicitDef(SDValue V)
#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix)
static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, std::function< bool(int64_t)> ValidateImm, bool Decrement=false)
static unsigned getSegInstNF(unsigned Intrinsic)
static bool isWorthFoldingIntoRegRegScale(const RISCVSubtarget &Subtarget, SDValue Add, SDValue Shift=SDValue())
Is it profitable to fold this Add into RegRegScale load/store.
static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo, unsigned Bits, const TargetInstrInfo *TII)
static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, const RISCVSubtarget *Subtarget, SDValue Addr, SDValue &Base, SDValue &Offset, bool IsPrefetch=false)
#define INST_ALL_NF_CASE_WITH_FF(NAME)
#define CASE_VMSLT_OPCODES(lmulenum, suffix)
bool isRegImmLoadOrStore(SDNode *User, SDValue Add)
static cl::opt< bool > UsePseudoMovImm("riscv-use-rematerializable-movimm", cl::Hidden, cl::desc("Use a rematerializable pseudoinstruction for 2 instruction " "constant materialization"), cl::init(false))
static SDValue findVSplat(SDValue N)
#define INST_ALL_NF_CASE(NAME)
Contains matchers for matching SelectionDAG nodes and values.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
#define PASS_NAME
DEMANGLE_DUMP_METHOD void dump() const
bool isZero() const
Definition APFloat.h:1445
APInt bitcastToAPInt() const
Definition APFloat.h:1353
bool isPosZero() const
Definition APFloat.h:1460
bool isNegZero() const
Definition APFloat.h:1461
Class for arbitrary precision integers.
Definition APInt.h:78
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:219
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1257
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:286
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1562
const APFloat & getValueAPF() const
uint64_t getZExtValue() const
int64_t getSExtValue() const
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
This class is used to form a handle around another node that is persistent and is updated across invo...
const SDValue & getValue() const
static StringRef getMemConstraintName(ConstraintCode C)
Definition InlineAsm.h:470
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Describe properties that are true of each instruction in the target description file.
Machine Value Type.
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isFixedLengthVector() const
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
MVT getVectorElementType() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
void setFlags(Flags f)
Bitwise OR the current flags with the given flags.
An SDNode that represents everything that will be needed to construct a MachineInstr.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
RISCVDAGToDAGISelLegacy(RISCVTargetMachine &TargetMachine, CodeGenOptLevel OptLevel)
bool selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal, SDValue &Val)
RISC-V doesn't have general instructions for integer setne/seteq, but we can check for equality with ...
bool selectSExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectNegImm(SDValue N, SDValue &Val)
bool selectZExtBits(SDValue N, unsigned Bits, SDValue &Val)
bool selectSHXADD_UWOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD_UW.
bool areOffsetsWithinAlignment(SDValue Addr, Align Alignment)
bool hasAllNBitUsers(SDNode *Node, unsigned Bits, const unsigned Depth=0) const
bool SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, SDValue &Offset)
Similar to SelectAddrRegImm, except that the least significant 5 bits of Offset should be all zeros.
bool selectZExtImm32(SDValue N, SDValue &Val)
bool SelectAddrRegZextRegScale(SDValue Addr, unsigned MaxShiftAmount, unsigned Bits, SDValue &Base, SDValue &Index, SDValue &Scale)
bool SelectAddrRegReg(SDValue Addr, SDValue &Base, SDValue &Offset)
void selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsOrdered)
void selectVLSEGFF(SDNode *Node, unsigned NF, bool IsMasked)
bool selectVSplatSimm5Plus1NoDec(SDValue N, SDValue &SplatVal)
bool selectSimm5Shl2(SDValue N, SDValue &Simm5, SDValue &Shl2)
void selectSF_VC_X_SE(SDNode *Node)
bool orDisjoint(const SDNode *Node) const
bool selectLow8BitsVSplat(SDValue N, SDValue &SplatVal)
bool hasAllHUsers(SDNode *Node) const
bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
bool selectVSplatSimm5(SDValue N, SDValue &SplatVal)
bool selectRVVSimm5(SDValue N, unsigned Width, SDValue &Imm)
bool SelectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset)
bool tryUnsignedBitfieldInsertInZero(SDNode *Node, const SDLoc &DL, MVT VT, SDValue X, unsigned Msb, unsigned Lsb)
bool hasAllWUsers(SDNode *Node) const
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
bool selectInvLogicImm(SDValue N, SDValue &Val)
bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset)
void Select(SDNode *Node) override
Main hook for targets to transform nodes into machine nodes.
void selectXSfmmVSET(SDNode *Node)
bool trySignedBitfieldInsertInSign(SDNode *Node)
bool selectVSplat(SDValue N, SDValue &SplatVal)
void addVectorLoadStoreOperands(SDNode *Node, unsigned SEWImm, const SDLoc &DL, unsigned CurOp, bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl< SDValue > &Operands, bool IsLoad=false, MVT *IndexVT=nullptr)
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
bool SelectAddrRegImm9(SDValue Addr, SDValue &Base, SDValue &Offset)
Similar to SelectAddrRegImm, except that the offset is restricted to uimm9.
bool selectScalarFPAsInt(SDValue N, SDValue &Imm)
bool hasAllBUsers(SDNode *Node) const
void selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsStrided)
bool tryShrinkShlLogicImm(SDNode *Node)
void selectVSETVLI(SDNode *Node)
bool selectVLOp(SDValue N, SDValue &VL)
bool trySignedBitfieldExtract(SDNode *Node)
bool selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal)
void selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsStrided)
bool selectVSplatImm64Neg(SDValue N, SDValue &SplatVal)
bool selectVSplatSimm5Plus1NonZero(SDValue N, SDValue &SplatVal)
bool tryUnsignedBitfieldExtract(SDNode *Node, const SDLoc &DL, MVT VT, SDValue X, unsigned Msb, unsigned Lsb)
void selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked, bool IsOrdered)
bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt)
bool selectSHXADDOp(SDValue N, unsigned ShAmt, SDValue &Val)
Look for various patterns that can be done with a SHL that can be folded into a SHXADD.
bool tryIndexedLoad(SDNode *Node)
bool SelectAddrRegRegScale(SDValue Addr, unsigned MaxShiftAmount, SDValue &Base, SDValue &Index, SDValue &Scale)
bool selectVSplatUimm(SDValue N, unsigned Bits, SDValue &SplatVal)
bool hasShlAdd(int64_t ShAmt) const
static std::pair< unsigned, unsigned > decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx, const RISCVRegisterInfo *TRI)
static unsigned getRegClassIDForVecVT(MVT VT)
static RISCVVType::VLMUL getLMUL(MVT VT)
Wrapper class representing virtual and physical registers.
Definition Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
SelectionDAGISelLegacy(char &ID, std::unique_ptr< SelectionDAGISel > S)
const TargetLowering * TLI
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const
IsProfitableToFold - Returns true if it's profitable to fold the specific operand node N of U during ...
static bool IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, CodeGenOptLevel OptLevel, bool IgnoreChains=false)
IsLegalToFold - Returns true if the specific operand node N of U can be folded during instruction sel...
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
static constexpr unsigned MaxRecursionDepth
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
ilist< SDNode >::iterator allnodes_iterator
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:344
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:347
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
iterator_range< user_iterator > users()
Definition Value.h:426
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:807
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:593
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:841
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:669
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:607
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:876
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
static bool hasVLOp(uint64_t TSFlags)
static bool hasVecPolicyOp(uint64_t TSFlags)
static bool hasSEWOp(uint64_t TSFlags)
static bool isFirstDefTiedToFirstUse(const MCInstrDesc &Desc)
InstSeq generateInstSeq(int64_t Val, const MCSubtargetInfo &STI)
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
InstSeq generateTwoRegInstSeq(int64_t Val, const MCSubtargetInfo &STI, unsigned &ShiftAmt, unsigned &AddOpc)
SmallVector< Inst, 8 > InstSeq
Definition RISCVMatInt.h:43
static unsigned decodeVSEW(unsigned VSEW)
LLVM_ABI unsigned encodeXSfmmVType(unsigned SEW, unsigned Widen, bool AltFmt)
LLVM_ABI std::pair< unsigned, bool > decodeVLMUL(VLMUL VLMul)
LLVM_ABI unsigned getSEWLMULRatio(unsigned SEW, VLMUL VLMul)
static unsigned decodeTWiden(unsigned TWiden)
LLVM_ABI unsigned encodeVTYPE(VLMUL VLMUL, unsigned SEW, bool TailAgnostic, bool MaskAgnostic, bool AltFmt=false)
unsigned getRVVMCOpcode(unsigned RVVPseudoOpcode)
std::optional< unsigned > getVectorLowDemandedScalarBits(unsigned Opcode, unsigned Log2SEW)
static constexpr unsigned RVVBitsPerBlock
static constexpr int64_t VLMaxSentinel
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
static const MachineMemOperand::Flags MONontemporalBit1
InstructionCost Cost
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:644
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:279
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:289
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:252
static const MachineMemOperand::Flags MONontemporalBit0
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:348
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:186
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:282
unsigned M1(unsigned Val)
Definition VE.h:377
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:342
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:222
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:270
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
constexpr T maskTrailingZeros(unsigned N)
Create a bitmask with the N right-most bits set to 0, and all other bits set to 1.
Definition MathExtras.h:103
@ Add
Sum of integers.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:560
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
FunctionPass * createRISCVISelDag(RISCVTargetMachine &TM, CodeGenOptLevel OptLevel)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:583
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:86
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:867
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.