LLVM 19.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
18#include "LoongArchSubtarget.h"
22#include "llvm/ADT/Statistic.h"
27#include "llvm/IR/IRBuilder.h"
28#include "llvm/IR/IntrinsicsLoongArch.h"
30#include "llvm/Support/Debug.h"
34
35using namespace llvm;
36
37#define DEBUG_TYPE "loongarch-isel-lowering"
38
39STATISTIC(NumTailCalls, "Number of tail calls");
40
41static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
42 cl::desc("Trap on integer division by zero."),
43 cl::init(false));
44
46 const LoongArchSubtarget &STI)
47 : TargetLowering(TM), Subtarget(STI) {
48
49 MVT GRLenVT = Subtarget.getGRLenVT();
50
51 // Set up the register classes.
52
53 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
54 if (Subtarget.hasBasicF())
55 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
56 if (Subtarget.hasBasicD())
57 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
58
59 static const MVT::SimpleValueType LSXVTs[] = {
60 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
61 static const MVT::SimpleValueType LASXVTs[] = {
62 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
63
64 if (Subtarget.hasExtLSX())
65 for (MVT VT : LSXVTs)
66 addRegisterClass(VT, &LoongArch::LSX128RegClass);
67
68 if (Subtarget.hasExtLASX())
69 for (MVT VT : LASXVTs)
70 addRegisterClass(VT, &LoongArch::LASX256RegClass);
71
72 // Set operations for LA32 and LA64.
73
75 MVT::i1, Promote);
76
83
86 GRLenVT, Custom);
87
89
94
97
101
102 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
103 // we get to know which of sll and revb.2h is faster.
106
107 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
108 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
109 // and i32 could still be byte-swapped relatively cheaply.
111
117
120
121 // Set operations for LA64 only.
122
123 if (Subtarget.is64Bit()) {
139
142 }
143
144 // Set operations for LA32 only.
145
146 if (!Subtarget.is64Bit()) {
152
153 // Set libcalls.
154 setLibcallName(RTLIB::MUL_I128, nullptr);
155 // The MULO libcall is not part of libgcc, only compiler-rt.
156 setLibcallName(RTLIB::MULO_I64, nullptr);
157 }
158
159 // The MULO libcall is not part of libgcc, only compiler-rt.
160 setLibcallName(RTLIB::MULO_I128, nullptr);
161
163
164 static const ISD::CondCode FPCCToExpand[] = {
167
168 // Set operations for 'F' feature.
169
170 if (Subtarget.hasBasicF()) {
171 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
172
186
187 if (Subtarget.is64Bit())
189
190 if (!Subtarget.hasBasicD()) {
192 if (Subtarget.is64Bit()) {
195 }
196 }
197 }
198
199 // Set operations for 'D' feature.
200
201 if (Subtarget.hasBasicD()) {
202 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
203 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
204 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
205
219
220 if (Subtarget.is64Bit())
222 }
223
224 // Set operations for 'LSX' feature.
225
226 if (Subtarget.hasExtLSX()) {
228 // Expand all truncating stores and extending loads.
229 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
230 setTruncStoreAction(VT, InnerVT, Expand);
233 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
234 }
235 // By default everything must be expanded. Then we will selectively turn
236 // on ones that can be effectively codegen'd.
237 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
239 }
240
241 for (MVT VT : LSXVTs) {
245
249
252 }
253 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
257 Legal);
259 VT, Legal);
266 Expand);
267 }
268 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
271 }
272 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
280 VT, Expand);
281 }
282 }
283
284 // Set operations for 'LASX' feature.
285
286 if (Subtarget.hasExtLASX()) {
287 for (MVT VT : LASXVTs) {
291
295
298 }
299 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
303 Legal);
305 VT, Legal);
312 Expand);
313 }
314 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
317 }
318 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
326 VT, Expand);
327 }
328 }
329
330 // Set DAG combine for LA32 and LA64.
331
335
336 // Set DAG combine for 'LSX' feature.
337
338 if (Subtarget.hasExtLSX())
340
341 // Compute derived properties from the register classes.
343
345
348
350
352
353 // Function alignments.
355 // Set preferred alignments.
359}
360
362 const GlobalAddressSDNode *GA) const {
363 // In order to maximise the opportunity for common subexpression elimination,
364 // keep a separate ADD node for the global address offset instead of folding
365 // it in the global address node. Later peephole optimisations may choose to
366 // fold it back in when profitable.
367 return false;
368}
369
371 SelectionDAG &DAG) const {
372 switch (Op.getOpcode()) {
374 return lowerATOMIC_FENCE(Op, DAG);
376 return lowerEH_DWARF_CFA(Op, DAG);
378 return lowerGlobalAddress(Op, DAG);
380 return lowerGlobalTLSAddress(Op, DAG);
382 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
384 return lowerINTRINSIC_W_CHAIN(Op, DAG);
386 return lowerINTRINSIC_VOID(Op, DAG);
388 return lowerBlockAddress(Op, DAG);
389 case ISD::JumpTable:
390 return lowerJumpTable(Op, DAG);
391 case ISD::SHL_PARTS:
392 return lowerShiftLeftParts(Op, DAG);
393 case ISD::SRA_PARTS:
394 return lowerShiftRightParts(Op, DAG, true);
395 case ISD::SRL_PARTS:
396 return lowerShiftRightParts(Op, DAG, false);
398 return lowerConstantPool(Op, DAG);
399 case ISD::FP_TO_SINT:
400 return lowerFP_TO_SINT(Op, DAG);
401 case ISD::BITCAST:
402 return lowerBITCAST(Op, DAG);
403 case ISD::UINT_TO_FP:
404 return lowerUINT_TO_FP(Op, DAG);
405 case ISD::SINT_TO_FP:
406 return lowerSINT_TO_FP(Op, DAG);
407 case ISD::VASTART:
408 return lowerVASTART(Op, DAG);
409 case ISD::FRAMEADDR:
410 return lowerFRAMEADDR(Op, DAG);
411 case ISD::RETURNADDR:
412 return lowerRETURNADDR(Op, DAG);
414 return lowerWRITE_REGISTER(Op, DAG);
416 return lowerINSERT_VECTOR_ELT(Op, DAG);
418 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
420 return lowerBUILD_VECTOR(Op, DAG);
422 return lowerVECTOR_SHUFFLE(Op, DAG);
423 }
424 return SDValue();
425}
426
427SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
428 SelectionDAG &DAG) const {
429 // TODO: custom shuffle.
430 return SDValue();
431}
432
433static bool isConstantOrUndef(const SDValue Op) {
434 if (Op->isUndef())
435 return true;
436 if (isa<ConstantSDNode>(Op))
437 return true;
438 if (isa<ConstantFPSDNode>(Op))
439 return true;
440 return false;
441}
442
444 for (unsigned i = 0; i < Op->getNumOperands(); ++i)
445 if (isConstantOrUndef(Op->getOperand(i)))
446 return true;
447 return false;
448}
449
450SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
451 SelectionDAG &DAG) const {
452 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
453 EVT ResTy = Op->getValueType(0);
454 SDLoc DL(Op);
455 APInt SplatValue, SplatUndef;
456 unsigned SplatBitSize;
457 bool HasAnyUndefs;
458 bool Is128Vec = ResTy.is128BitVector();
459 bool Is256Vec = ResTy.is256BitVector();
460
461 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
462 (!Subtarget.hasExtLASX() || !Is256Vec))
463 return SDValue();
464
465 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
466 /*MinSplatBits=*/8) &&
467 SplatBitSize <= 64) {
468 // We can only cope with 8, 16, 32, or 64-bit elements.
469 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
470 SplatBitSize != 64)
471 return SDValue();
472
473 EVT ViaVecTy;
474
475 switch (SplatBitSize) {
476 default:
477 return SDValue();
478 case 8:
479 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
480 break;
481 case 16:
482 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
483 break;
484 case 32:
485 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
486 break;
487 case 64:
488 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
489 break;
490 }
491
492 // SelectionDAG::getConstant will promote SplatValue appropriately.
493 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
494
495 // Bitcast to the type we originally wanted.
496 if (ViaVecTy != ResTy)
497 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
498
499 return Result;
500 }
501
502 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
503 return Op;
504
506 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
507 // The resulting code is the same length as the expansion, but it doesn't
508 // use memory operations.
509 EVT ResTy = Node->getValueType(0);
510
511 assert(ResTy.isVector());
512
513 unsigned NumElts = ResTy.getVectorNumElements();
514 SDValue Vector = DAG.getUNDEF(ResTy);
515 for (unsigned i = 0; i < NumElts; ++i) {
517 Node->getOperand(i),
518 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
519 }
520 return Vector;
521 }
522
523 return SDValue();
524}
525
527LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
528 SelectionDAG &DAG) const {
529 EVT VecTy = Op->getOperand(0)->getValueType(0);
530 SDValue Idx = Op->getOperand(1);
531 EVT EltTy = VecTy.getVectorElementType();
532 unsigned NumElts = VecTy.getVectorNumElements();
533
534 if (isa<ConstantSDNode>(Idx) &&
535 (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 ||
536 EltTy == MVT::f64 || Idx->getAsZExtVal() < NumElts / 2))
537 return Op;
538
539 return SDValue();
540}
541
543LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
544 SelectionDAG &DAG) const {
545 if (isa<ConstantSDNode>(Op->getOperand(2)))
546 return Op;
547 return SDValue();
548}
549
550SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
551 SelectionDAG &DAG) const {
552 SDLoc DL(Op);
553 SyncScope::ID FenceSSID =
554 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
555
556 // singlethread fences only synchronize with signal handlers on the same
557 // thread and thus only need to preserve instruction order, not actually
558 // enforce memory ordering.
559 if (FenceSSID == SyncScope::SingleThread)
560 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
561 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
562
563 return Op;
564}
565
566SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
567 SelectionDAG &DAG) const {
568
569 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
570 DAG.getContext()->emitError(
571 "On LA64, only 64-bit registers can be written.");
572 return Op.getOperand(0);
573 }
574
575 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
576 DAG.getContext()->emitError(
577 "On LA32, only 32-bit registers can be written.");
578 return Op.getOperand(0);
579 }
580
581 return Op;
582}
583
584SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
585 SelectionDAG &DAG) const {
586 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
587 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
588 "be a constant integer");
589 return SDValue();
590 }
591
594 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
595 EVT VT = Op.getValueType();
596 SDLoc DL(Op);
597 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
598 unsigned Depth = Op.getConstantOperandVal(0);
599 int GRLenInBytes = Subtarget.getGRLen() / 8;
600
601 while (Depth--) {
602 int Offset = -(GRLenInBytes * 2);
603 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
605 FrameAddr =
606 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
607 }
608 return FrameAddr;
609}
610
611SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
612 SelectionDAG &DAG) const {
614 return SDValue();
615
616 // Currently only support lowering return address for current frame.
617 if (Op.getConstantOperandVal(0) != 0) {
618 DAG.getContext()->emitError(
619 "return address can only be determined for the current frame");
620 return SDValue();
621 }
622
625 MVT GRLenVT = Subtarget.getGRLenVT();
626
627 // Return the value of the return address register, marking it an implicit
628 // live-in.
629 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
630 getRegClassFor(GRLenVT));
631 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
632}
633
634SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
635 SelectionDAG &DAG) const {
637 auto Size = Subtarget.getGRLen() / 8;
638 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
639 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
640}
641
642SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
643 SelectionDAG &DAG) const {
645 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
646
647 SDLoc DL(Op);
648 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
650
651 // vastart just stores the address of the VarArgsFrameIndex slot into the
652 // memory location argument.
653 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
654 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
656}
657
658SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
659 SelectionDAG &DAG) const {
660 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
661 !Subtarget.hasBasicD() && "unexpected target features");
662
663 SDLoc DL(Op);
664 SDValue Op0 = Op.getOperand(0);
665 if (Op0->getOpcode() == ISD::AND) {
666 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
667 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
668 return Op;
669 }
670
671 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
672 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
673 Op0.getConstantOperandVal(2) == UINT64_C(0))
674 return Op;
675
676 if (Op0.getOpcode() == ISD::AssertZext &&
677 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
678 return Op;
679
680 EVT OpVT = Op0.getValueType();
681 EVT RetVT = Op.getValueType();
682 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
683 MakeLibCallOptions CallOptions;
684 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
685 SDValue Chain = SDValue();
687 std::tie(Result, Chain) =
688 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
689 return Result;
690}
691
692SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
693 SelectionDAG &DAG) const {
694 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
695 !Subtarget.hasBasicD() && "unexpected target features");
696
697 SDLoc DL(Op);
698 SDValue Op0 = Op.getOperand(0);
699
700 if ((Op0.getOpcode() == ISD::AssertSext ||
702 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
703 return Op;
704
705 EVT OpVT = Op0.getValueType();
706 EVT RetVT = Op.getValueType();
707 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
708 MakeLibCallOptions CallOptions;
709 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
710 SDValue Chain = SDValue();
712 std::tie(Result, Chain) =
713 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
714 return Result;
715}
716
717SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
718 SelectionDAG &DAG) const {
719
720 SDLoc DL(Op);
721 SDValue Op0 = Op.getOperand(0);
722
723 if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 &&
724 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
725 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
726 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
727 }
728 return Op;
729}
730
731SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
732 SelectionDAG &DAG) const {
733
734 SDLoc DL(Op);
735
736 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
737 !Subtarget.hasBasicD()) {
738 SDValue Dst =
739 DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op.getOperand(0));
740 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
741 }
742
743 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
744 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op.getOperand(0));
745 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
746}
747
749 SelectionDAG &DAG, unsigned Flags) {
750 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
751}
752
754 SelectionDAG &DAG, unsigned Flags) {
755 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
756 Flags);
757}
758
760 SelectionDAG &DAG, unsigned Flags) {
761 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
762 N->getOffset(), Flags);
763}
764
766 SelectionDAG &DAG, unsigned Flags) {
767 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
768}
769
770template <class NodeTy>
771SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
773 bool IsLocal) const {
774 SDLoc DL(N);
775 EVT Ty = getPointerTy(DAG.getDataLayout());
776 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
777
778 switch (M) {
779 default:
780 report_fatal_error("Unsupported code model");
781
782 case CodeModel::Large: {
783 assert(Subtarget.is64Bit() && "Large code model requires LA64");
784
785 // This is not actually used, but is necessary for successfully matching
786 // the PseudoLA_*_LARGE nodes.
787 SDValue Tmp = DAG.getConstant(0, DL, Ty);
788 if (IsLocal)
789 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
790 // eventually becomes the desired 5-insn code sequence.
791 return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
792 Tmp, Addr),
793 0);
794
795 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that eventually
796 // becomes the desired 5-insn code sequence.
797 return SDValue(
798 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
799 0);
800 }
801
802 case CodeModel::Small:
804 if (IsLocal)
805 // This generates the pattern (PseudoLA_PCREL sym), which expands to
806 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
807 return SDValue(
808 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
809
810 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
811 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
812 return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr),
813 0);
814 }
815}
816
817SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
818 SelectionDAG &DAG) const {
819 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
820 DAG.getTarget().getCodeModel());
821}
822
823SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
824 SelectionDAG &DAG) const {
825 return getAddr(cast<JumpTableSDNode>(Op), DAG,
826 DAG.getTarget().getCodeModel());
827}
828
829SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
830 SelectionDAG &DAG) const {
831 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
832 DAG.getTarget().getCodeModel());
833}
834
835SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
836 SelectionDAG &DAG) const {
837 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
838 assert(N->getOffset() == 0 && "unexpected offset in global node");
839 auto CM = DAG.getTarget().getCodeModel();
840 const GlobalValue *GV = N->getGlobal();
841
842 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
843 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
844 CM = *GCM;
845 }
846
847 return getAddr(N, DAG, CM, GV->isDSOLocal());
848}
849
850SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
851 SelectionDAG &DAG,
852 unsigned Opc,
853 bool Large) const {
854 SDLoc DL(N);
855 EVT Ty = getPointerTy(DAG.getDataLayout());
856 MVT GRLenVT = Subtarget.getGRLenVT();
857
858 // This is not actually used, but is necessary for successfully matching the
859 // PseudoLA_*_LARGE nodes.
860 SDValue Tmp = DAG.getConstant(0, DL, Ty);
861 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
863 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
864 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
865
866 // Add the thread pointer.
867 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
868 DAG.getRegister(LoongArch::R2, GRLenVT));
869}
870
871SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
872 SelectionDAG &DAG,
873 unsigned Opc,
874 bool Large) const {
875 SDLoc DL(N);
876 EVT Ty = getPointerTy(DAG.getDataLayout());
877 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
878
879 // This is not actually used, but is necessary for successfully matching the
880 // PseudoLA_*_LARGE nodes.
881 SDValue Tmp = DAG.getConstant(0, DL, Ty);
882
883 // Use a PC-relative addressing mode to access the dynamic GOT address.
884 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
885 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
886 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
887
888 // Prepare argument list to generate call.
890 ArgListEntry Entry;
891 Entry.Node = Load;
892 Entry.Ty = CallTy;
893 Args.push_back(Entry);
894
895 // Setup call to __tls_get_addr.
897 CLI.setDebugLoc(DL)
898 .setChain(DAG.getEntryNode())
899 .setLibCallee(CallingConv::C, CallTy,
900 DAG.getExternalSymbol("__tls_get_addr", Ty),
901 std::move(Args));
902
903 return LowerCallTo(CLI).first;
904}
905
907LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
908 SelectionDAG &DAG) const {
911 report_fatal_error("In GHC calling convention TLS is not supported");
912
914 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
915
916 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
917 assert(N->getOffset() == 0 && "unexpected offset in global node");
918
920 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
922 // In this model, application code calls the dynamic linker function
923 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
924 // runtime.
925 Addr = getDynamicTLSAddr(N, DAG,
926 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
927 : LoongArch::PseudoLA_TLS_GD,
928 Large);
929 break;
931 // Same as GeneralDynamic, except for assembly modifiers and relocation
932 // records.
933 Addr = getDynamicTLSAddr(N, DAG,
934 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
935 : LoongArch::PseudoLA_TLS_LD,
936 Large);
937 break;
939 // This model uses the GOT to resolve TLS offsets.
940 Addr = getStaticTLSAddr(N, DAG,
941 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
942 : LoongArch::PseudoLA_TLS_IE,
943 Large);
944 break;
946 // This model is used when static linking as the TLS offsets are resolved
947 // during program linking.
948 //
949 // This node doesn't need an extra argument for the large code model.
950 Addr = getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE);
951 break;
952 }
953
954 return Addr;
955}
956
957template <unsigned N>
958static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp,
959 SelectionDAG &DAG, bool IsSigned = false) {
960 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
961 // Check the ImmArg.
962 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
963 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
964 DAG.getContext()->emitError(Op->getOperationName(0) +
965 ": argument out of range.");
966 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
967 }
968 return SDValue();
969}
970
972LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
973 SelectionDAG &DAG) const {
974 SDLoc DL(Op);
975 switch (Op.getConstantOperandVal(0)) {
976 default:
977 return SDValue(); // Don't custom lower most intrinsics.
978 case Intrinsic::thread_pointer: {
979 EVT PtrVT = getPointerTy(DAG.getDataLayout());
980 return DAG.getRegister(LoongArch::R2, PtrVT);
981 }
982 case Intrinsic::loongarch_lsx_vpickve2gr_d:
983 case Intrinsic::loongarch_lsx_vpickve2gr_du:
984 case Intrinsic::loongarch_lsx_vreplvei_d:
985 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
986 return checkIntrinsicImmArg<1>(Op, 2, DAG);
987 case Intrinsic::loongarch_lsx_vreplvei_w:
988 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
989 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
990 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
991 case Intrinsic::loongarch_lasx_xvpickve_d:
992 case Intrinsic::loongarch_lasx_xvpickve_d_f:
993 return checkIntrinsicImmArg<2>(Op, 2, DAG);
994 case Intrinsic::loongarch_lasx_xvinsve0_d:
995 return checkIntrinsicImmArg<2>(Op, 3, DAG);
996 case Intrinsic::loongarch_lsx_vsat_b:
997 case Intrinsic::loongarch_lsx_vsat_bu:
998 case Intrinsic::loongarch_lsx_vrotri_b:
999 case Intrinsic::loongarch_lsx_vsllwil_h_b:
1000 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
1001 case Intrinsic::loongarch_lsx_vsrlri_b:
1002 case Intrinsic::loongarch_lsx_vsrari_b:
1003 case Intrinsic::loongarch_lsx_vreplvei_h:
1004 case Intrinsic::loongarch_lasx_xvsat_b:
1005 case Intrinsic::loongarch_lasx_xvsat_bu:
1006 case Intrinsic::loongarch_lasx_xvrotri_b:
1007 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
1008 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
1009 case Intrinsic::loongarch_lasx_xvsrlri_b:
1010 case Intrinsic::loongarch_lasx_xvsrari_b:
1011 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
1012 case Intrinsic::loongarch_lasx_xvpickve_w:
1013 case Intrinsic::loongarch_lasx_xvpickve_w_f:
1014 return checkIntrinsicImmArg<3>(Op, 2, DAG);
1015 case Intrinsic::loongarch_lasx_xvinsve0_w:
1016 return checkIntrinsicImmArg<3>(Op, 3, DAG);
1017 case Intrinsic::loongarch_lsx_vsat_h:
1018 case Intrinsic::loongarch_lsx_vsat_hu:
1019 case Intrinsic::loongarch_lsx_vrotri_h:
1020 case Intrinsic::loongarch_lsx_vsllwil_w_h:
1021 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
1022 case Intrinsic::loongarch_lsx_vsrlri_h:
1023 case Intrinsic::loongarch_lsx_vsrari_h:
1024 case Intrinsic::loongarch_lsx_vreplvei_b:
1025 case Intrinsic::loongarch_lasx_xvsat_h:
1026 case Intrinsic::loongarch_lasx_xvsat_hu:
1027 case Intrinsic::loongarch_lasx_xvrotri_h:
1028 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
1029 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
1030 case Intrinsic::loongarch_lasx_xvsrlri_h:
1031 case Intrinsic::loongarch_lasx_xvsrari_h:
1032 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
1033 return checkIntrinsicImmArg<4>(Op, 2, DAG);
1034 case Intrinsic::loongarch_lsx_vsrlni_b_h:
1035 case Intrinsic::loongarch_lsx_vsrani_b_h:
1036 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
1037 case Intrinsic::loongarch_lsx_vsrarni_b_h:
1038 case Intrinsic::loongarch_lsx_vssrlni_b_h:
1039 case Intrinsic::loongarch_lsx_vssrani_b_h:
1040 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
1041 case Intrinsic::loongarch_lsx_vssrani_bu_h:
1042 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
1043 case Intrinsic::loongarch_lsx_vssrarni_b_h:
1044 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
1045 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
1046 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
1047 case Intrinsic::loongarch_lasx_xvsrani_b_h:
1048 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
1049 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
1050 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
1051 case Intrinsic::loongarch_lasx_xvssrani_b_h:
1052 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
1053 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
1054 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
1055 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
1056 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
1057 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
1058 return checkIntrinsicImmArg<4>(Op, 3, DAG);
1059 case Intrinsic::loongarch_lsx_vsat_w:
1060 case Intrinsic::loongarch_lsx_vsat_wu:
1061 case Intrinsic::loongarch_lsx_vrotri_w:
1062 case Intrinsic::loongarch_lsx_vsllwil_d_w:
1063 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
1064 case Intrinsic::loongarch_lsx_vsrlri_w:
1065 case Intrinsic::loongarch_lsx_vsrari_w:
1066 case Intrinsic::loongarch_lsx_vslei_bu:
1067 case Intrinsic::loongarch_lsx_vslei_hu:
1068 case Intrinsic::loongarch_lsx_vslei_wu:
1069 case Intrinsic::loongarch_lsx_vslei_du:
1070 case Intrinsic::loongarch_lsx_vslti_bu:
1071 case Intrinsic::loongarch_lsx_vslti_hu:
1072 case Intrinsic::loongarch_lsx_vslti_wu:
1073 case Intrinsic::loongarch_lsx_vslti_du:
1074 case Intrinsic::loongarch_lsx_vbsll_v:
1075 case Intrinsic::loongarch_lsx_vbsrl_v:
1076 case Intrinsic::loongarch_lasx_xvsat_w:
1077 case Intrinsic::loongarch_lasx_xvsat_wu:
1078 case Intrinsic::loongarch_lasx_xvrotri_w:
1079 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
1080 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
1081 case Intrinsic::loongarch_lasx_xvsrlri_w:
1082 case Intrinsic::loongarch_lasx_xvsrari_w:
1083 case Intrinsic::loongarch_lasx_xvslei_bu:
1084 case Intrinsic::loongarch_lasx_xvslei_hu:
1085 case Intrinsic::loongarch_lasx_xvslei_wu:
1086 case Intrinsic::loongarch_lasx_xvslei_du:
1087 case Intrinsic::loongarch_lasx_xvslti_bu:
1088 case Intrinsic::loongarch_lasx_xvslti_hu:
1089 case Intrinsic::loongarch_lasx_xvslti_wu:
1090 case Intrinsic::loongarch_lasx_xvslti_du:
1091 case Intrinsic::loongarch_lasx_xvbsll_v:
1092 case Intrinsic::loongarch_lasx_xvbsrl_v:
1093 return checkIntrinsicImmArg<5>(Op, 2, DAG);
1094 case Intrinsic::loongarch_lsx_vseqi_b:
1095 case Intrinsic::loongarch_lsx_vseqi_h:
1096 case Intrinsic::loongarch_lsx_vseqi_w:
1097 case Intrinsic::loongarch_lsx_vseqi_d:
1098 case Intrinsic::loongarch_lsx_vslei_b:
1099 case Intrinsic::loongarch_lsx_vslei_h:
1100 case Intrinsic::loongarch_lsx_vslei_w:
1101 case Intrinsic::loongarch_lsx_vslei_d:
1102 case Intrinsic::loongarch_lsx_vslti_b:
1103 case Intrinsic::loongarch_lsx_vslti_h:
1104 case Intrinsic::loongarch_lsx_vslti_w:
1105 case Intrinsic::loongarch_lsx_vslti_d:
1106 case Intrinsic::loongarch_lasx_xvseqi_b:
1107 case Intrinsic::loongarch_lasx_xvseqi_h:
1108 case Intrinsic::loongarch_lasx_xvseqi_w:
1109 case Intrinsic::loongarch_lasx_xvseqi_d:
1110 case Intrinsic::loongarch_lasx_xvslei_b:
1111 case Intrinsic::loongarch_lasx_xvslei_h:
1112 case Intrinsic::loongarch_lasx_xvslei_w:
1113 case Intrinsic::loongarch_lasx_xvslei_d:
1114 case Intrinsic::loongarch_lasx_xvslti_b:
1115 case Intrinsic::loongarch_lasx_xvslti_h:
1116 case Intrinsic::loongarch_lasx_xvslti_w:
1117 case Intrinsic::loongarch_lasx_xvslti_d:
1118 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
1119 case Intrinsic::loongarch_lsx_vsrlni_h_w:
1120 case Intrinsic::loongarch_lsx_vsrani_h_w:
1121 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
1122 case Intrinsic::loongarch_lsx_vsrarni_h_w:
1123 case Intrinsic::loongarch_lsx_vssrlni_h_w:
1124 case Intrinsic::loongarch_lsx_vssrani_h_w:
1125 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
1126 case Intrinsic::loongarch_lsx_vssrani_hu_w:
1127 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
1128 case Intrinsic::loongarch_lsx_vssrarni_h_w:
1129 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
1130 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
1131 case Intrinsic::loongarch_lsx_vfrstpi_b:
1132 case Intrinsic::loongarch_lsx_vfrstpi_h:
1133 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
1134 case Intrinsic::loongarch_lasx_xvsrani_h_w:
1135 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
1136 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
1137 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
1138 case Intrinsic::loongarch_lasx_xvssrani_h_w:
1139 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
1140 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
1141 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
1142 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
1143 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
1144 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
1145 case Intrinsic::loongarch_lasx_xvfrstpi_b:
1146 case Intrinsic::loongarch_lasx_xvfrstpi_h:
1147 return checkIntrinsicImmArg<5>(Op, 3, DAG);
1148 case Intrinsic::loongarch_lsx_vsat_d:
1149 case Intrinsic::loongarch_lsx_vsat_du:
1150 case Intrinsic::loongarch_lsx_vrotri_d:
1151 case Intrinsic::loongarch_lsx_vsrlri_d:
1152 case Intrinsic::loongarch_lsx_vsrari_d:
1153 case Intrinsic::loongarch_lasx_xvsat_d:
1154 case Intrinsic::loongarch_lasx_xvsat_du:
1155 case Intrinsic::loongarch_lasx_xvrotri_d:
1156 case Intrinsic::loongarch_lasx_xvsrlri_d:
1157 case Intrinsic::loongarch_lasx_xvsrari_d:
1158 return checkIntrinsicImmArg<6>(Op, 2, DAG);
1159 case Intrinsic::loongarch_lsx_vsrlni_w_d:
1160 case Intrinsic::loongarch_lsx_vsrani_w_d:
1161 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
1162 case Intrinsic::loongarch_lsx_vsrarni_w_d:
1163 case Intrinsic::loongarch_lsx_vssrlni_w_d:
1164 case Intrinsic::loongarch_lsx_vssrani_w_d:
1165 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
1166 case Intrinsic::loongarch_lsx_vssrani_wu_d:
1167 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
1168 case Intrinsic::loongarch_lsx_vssrarni_w_d:
1169 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
1170 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
1171 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
1172 case Intrinsic::loongarch_lasx_xvsrani_w_d:
1173 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
1174 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
1175 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
1176 case Intrinsic::loongarch_lasx_xvssrani_w_d:
1177 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
1178 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
1179 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
1180 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
1181 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
1182 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
1183 return checkIntrinsicImmArg<6>(Op, 3, DAG);
1184 case Intrinsic::loongarch_lsx_vsrlni_d_q:
1185 case Intrinsic::loongarch_lsx_vsrani_d_q:
1186 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
1187 case Intrinsic::loongarch_lsx_vsrarni_d_q:
1188 case Intrinsic::loongarch_lsx_vssrlni_d_q:
1189 case Intrinsic::loongarch_lsx_vssrani_d_q:
1190 case Intrinsic::loongarch_lsx_vssrlni_du_q:
1191 case Intrinsic::loongarch_lsx_vssrani_du_q:
1192 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
1193 case Intrinsic::loongarch_lsx_vssrarni_d_q:
1194 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
1195 case Intrinsic::loongarch_lsx_vssrarni_du_q:
1196 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
1197 case Intrinsic::loongarch_lasx_xvsrani_d_q:
1198 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
1199 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
1200 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
1201 case Intrinsic::loongarch_lasx_xvssrani_d_q:
1202 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
1203 case Intrinsic::loongarch_lasx_xvssrani_du_q:
1204 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
1205 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
1206 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
1207 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
1208 return checkIntrinsicImmArg<7>(Op, 3, DAG);
1209 case Intrinsic::loongarch_lsx_vnori_b:
1210 case Intrinsic::loongarch_lsx_vshuf4i_b:
1211 case Intrinsic::loongarch_lsx_vshuf4i_h:
1212 case Intrinsic::loongarch_lsx_vshuf4i_w:
1213 case Intrinsic::loongarch_lasx_xvnori_b:
1214 case Intrinsic::loongarch_lasx_xvshuf4i_b:
1215 case Intrinsic::loongarch_lasx_xvshuf4i_h:
1216 case Intrinsic::loongarch_lasx_xvshuf4i_w:
1217 case Intrinsic::loongarch_lasx_xvpermi_d:
1218 return checkIntrinsicImmArg<8>(Op, 2, DAG);
1219 case Intrinsic::loongarch_lsx_vshuf4i_d:
1220 case Intrinsic::loongarch_lsx_vpermi_w:
1221 case Intrinsic::loongarch_lsx_vbitseli_b:
1222 case Intrinsic::loongarch_lsx_vextrins_b:
1223 case Intrinsic::loongarch_lsx_vextrins_h:
1224 case Intrinsic::loongarch_lsx_vextrins_w:
1225 case Intrinsic::loongarch_lsx_vextrins_d:
1226 case Intrinsic::loongarch_lasx_xvshuf4i_d:
1227 case Intrinsic::loongarch_lasx_xvpermi_w:
1228 case Intrinsic::loongarch_lasx_xvpermi_q:
1229 case Intrinsic::loongarch_lasx_xvbitseli_b:
1230 case Intrinsic::loongarch_lasx_xvextrins_b:
1231 case Intrinsic::loongarch_lasx_xvextrins_h:
1232 case Intrinsic::loongarch_lasx_xvextrins_w:
1233 case Intrinsic::loongarch_lasx_xvextrins_d:
1234 return checkIntrinsicImmArg<8>(Op, 3, DAG);
1235 case Intrinsic::loongarch_lsx_vrepli_b:
1236 case Intrinsic::loongarch_lsx_vrepli_h:
1237 case Intrinsic::loongarch_lsx_vrepli_w:
1238 case Intrinsic::loongarch_lsx_vrepli_d:
1239 case Intrinsic::loongarch_lasx_xvrepli_b:
1240 case Intrinsic::loongarch_lasx_xvrepli_h:
1241 case Intrinsic::loongarch_lasx_xvrepli_w:
1242 case Intrinsic::loongarch_lasx_xvrepli_d:
1243 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
1244 case Intrinsic::loongarch_lsx_vldi:
1245 case Intrinsic::loongarch_lasx_xvldi:
1246 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
1247 }
1248}
1249
1250// Helper function that emits error message for intrinsics with chain and return
1251// merge values of a UNDEF and the chain.
1253 StringRef ErrorMsg,
1254 SelectionDAG &DAG) {
1255 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
1256 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
1257 SDLoc(Op));
1258}
1259
1260SDValue
1261LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
1262 SelectionDAG &DAG) const {
1263 SDLoc DL(Op);
1264 MVT GRLenVT = Subtarget.getGRLenVT();
1265 EVT VT = Op.getValueType();
1266 SDValue Chain = Op.getOperand(0);
1267 const StringRef ErrorMsgOOR = "argument out of range";
1268 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
1269 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
1270
1271 switch (Op.getConstantOperandVal(1)) {
1272 default:
1273 return Op;
1274 case Intrinsic::loongarch_crc_w_b_w:
1275 case Intrinsic::loongarch_crc_w_h_w:
1276 case Intrinsic::loongarch_crc_w_w_w:
1277 case Intrinsic::loongarch_crc_w_d_w:
1278 case Intrinsic::loongarch_crcc_w_b_w:
1279 case Intrinsic::loongarch_crcc_w_h_w:
1280 case Intrinsic::loongarch_crcc_w_w_w:
1281 case Intrinsic::loongarch_crcc_w_d_w:
1282 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
1283 case Intrinsic::loongarch_csrrd_w:
1284 case Intrinsic::loongarch_csrrd_d: {
1285 unsigned Imm = Op.getConstantOperandVal(2);
1286 return !isUInt<14>(Imm)
1287 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1288 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
1289 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
1290 }
1291 case Intrinsic::loongarch_csrwr_w:
1292 case Intrinsic::loongarch_csrwr_d: {
1293 unsigned Imm = Op.getConstantOperandVal(3);
1294 return !isUInt<14>(Imm)
1295 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1296 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
1297 {Chain, Op.getOperand(2),
1298 DAG.getConstant(Imm, DL, GRLenVT)});
1299 }
1300 case Intrinsic::loongarch_csrxchg_w:
1301 case Intrinsic::loongarch_csrxchg_d: {
1302 unsigned Imm = Op.getConstantOperandVal(4);
1303 return !isUInt<14>(Imm)
1304 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1305 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
1306 {Chain, Op.getOperand(2), Op.getOperand(3),
1307 DAG.getConstant(Imm, DL, GRLenVT)});
1308 }
1309 case Intrinsic::loongarch_iocsrrd_d: {
1310 return DAG.getNode(
1311 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
1312 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
1313 }
1314#define IOCSRRD_CASE(NAME, NODE) \
1315 case Intrinsic::loongarch_##NAME: { \
1316 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
1317 {Chain, Op.getOperand(2)}); \
1318 }
1319 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
1320 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
1321 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
1322#undef IOCSRRD_CASE
1323 case Intrinsic::loongarch_cpucfg: {
1324 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
1325 {Chain, Op.getOperand(2)});
1326 }
1327 case Intrinsic::loongarch_lddir_d: {
1328 unsigned Imm = Op.getConstantOperandVal(3);
1329 return !isUInt<8>(Imm)
1330 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1331 : Op;
1332 }
1333 case Intrinsic::loongarch_movfcsr2gr: {
1334 if (!Subtarget.hasBasicF())
1335 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
1336 unsigned Imm = Op.getConstantOperandVal(2);
1337 return !isUInt<2>(Imm)
1338 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1339 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
1340 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
1341 }
1342 case Intrinsic::loongarch_lsx_vld:
1343 case Intrinsic::loongarch_lsx_vldrepl_b:
1344 case Intrinsic::loongarch_lasx_xvld:
1345 case Intrinsic::loongarch_lasx_xvldrepl_b:
1346 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
1347 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1348 : SDValue();
1349 case Intrinsic::loongarch_lsx_vldrepl_h:
1350 case Intrinsic::loongarch_lasx_xvldrepl_h:
1351 return !isShiftedInt<11, 1>(
1352 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
1354 Op, "argument out of range or not a multiple of 2", DAG)
1355 : SDValue();
1356 case Intrinsic::loongarch_lsx_vldrepl_w:
1357 case Intrinsic::loongarch_lasx_xvldrepl_w:
1358 return !isShiftedInt<10, 2>(
1359 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
1361 Op, "argument out of range or not a multiple of 4", DAG)
1362 : SDValue();
1363 case Intrinsic::loongarch_lsx_vldrepl_d:
1364 case Intrinsic::loongarch_lasx_xvldrepl_d:
1365 return !isShiftedInt<9, 3>(
1366 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
1368 Op, "argument out of range or not a multiple of 8", DAG)
1369 : SDValue();
1370 }
1371}
1372
1373// Helper function that emits error message for intrinsics with void return
1374// value and return the chain.
1376 SelectionDAG &DAG) {
1377
1378 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
1379 return Op.getOperand(0);
1380}
1381
1382SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
1383 SelectionDAG &DAG) const {
1384 SDLoc DL(Op);
1385 MVT GRLenVT = Subtarget.getGRLenVT();
1386 SDValue Chain = Op.getOperand(0);
1387 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
1388 SDValue Op2 = Op.getOperand(2);
1389 const StringRef ErrorMsgOOR = "argument out of range";
1390 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
1391 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
1392 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
1393
1394 switch (IntrinsicEnum) {
1395 default:
1396 // TODO: Add more Intrinsics.
1397 return SDValue();
1398 case Intrinsic::loongarch_cacop_d:
1399 case Intrinsic::loongarch_cacop_w: {
1400 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
1401 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
1402 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
1403 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
1404 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
1405 unsigned Imm1 = Op2->getAsZExtVal();
1406 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
1407 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
1408 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
1409 return Op;
1410 }
1411 case Intrinsic::loongarch_dbar: {
1412 unsigned Imm = Op2->getAsZExtVal();
1413 return !isUInt<15>(Imm)
1414 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1415 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
1416 DAG.getConstant(Imm, DL, GRLenVT));
1417 }
1418 case Intrinsic::loongarch_ibar: {
1419 unsigned Imm = Op2->getAsZExtVal();
1420 return !isUInt<15>(Imm)
1421 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1422 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
1423 DAG.getConstant(Imm, DL, GRLenVT));
1424 }
1425 case Intrinsic::loongarch_break: {
1426 unsigned Imm = Op2->getAsZExtVal();
1427 return !isUInt<15>(Imm)
1428 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1429 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
1430 DAG.getConstant(Imm, DL, GRLenVT));
1431 }
1432 case Intrinsic::loongarch_movgr2fcsr: {
1433 if (!Subtarget.hasBasicF())
1434 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
1435 unsigned Imm = Op2->getAsZExtVal();
1436 return !isUInt<2>(Imm)
1437 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1438 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
1439 DAG.getConstant(Imm, DL, GRLenVT),
1440 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
1441 Op.getOperand(3)));
1442 }
1443 case Intrinsic::loongarch_syscall: {
1444 unsigned Imm = Op2->getAsZExtVal();
1445 return !isUInt<15>(Imm)
1446 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1447 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
1448 DAG.getConstant(Imm, DL, GRLenVT));
1449 }
1450#define IOCSRWR_CASE(NAME, NODE) \
1451 case Intrinsic::loongarch_##NAME: { \
1452 SDValue Op3 = Op.getOperand(3); \
1453 return Subtarget.is64Bit() \
1454 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
1455 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
1456 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
1457 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
1458 Op3); \
1459 }
1460 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
1461 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
1462 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
1463#undef IOCSRWR_CASE
1464 case Intrinsic::loongarch_iocsrwr_d: {
1465 return !Subtarget.is64Bit()
1466 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
1467 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
1468 Op2,
1469 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
1470 Op.getOperand(3)));
1471 }
1472#define ASRT_LE_GT_CASE(NAME) \
1473 case Intrinsic::loongarch_##NAME: { \
1474 return !Subtarget.is64Bit() \
1475 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
1476 : Op; \
1477 }
1478 ASRT_LE_GT_CASE(asrtle_d)
1479 ASRT_LE_GT_CASE(asrtgt_d)
1480#undef ASRT_LE_GT_CASE
1481 case Intrinsic::loongarch_ldpte_d: {
1482 unsigned Imm = Op.getConstantOperandVal(3);
1483 return !Subtarget.is64Bit()
1484 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
1485 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1486 : Op;
1487 }
1488 case Intrinsic::loongarch_lsx_vst:
1489 case Intrinsic::loongarch_lasx_xvst:
1490 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
1491 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1492 : SDValue();
1493 case Intrinsic::loongarch_lasx_xvstelm_b:
1494 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1495 !isUInt<5>(Op.getConstantOperandVal(5)))
1496 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1497 : SDValue();
1498 case Intrinsic::loongarch_lsx_vstelm_b:
1499 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1500 !isUInt<4>(Op.getConstantOperandVal(5)))
1501 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1502 : SDValue();
1503 case Intrinsic::loongarch_lasx_xvstelm_h:
1504 return (!isShiftedInt<8, 1>(
1505 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1506 !isUInt<4>(Op.getConstantOperandVal(5)))
1508 Op, "argument out of range or not a multiple of 2", DAG)
1509 : SDValue();
1510 case Intrinsic::loongarch_lsx_vstelm_h:
1511 return (!isShiftedInt<8, 1>(
1512 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1513 !isUInt<3>(Op.getConstantOperandVal(5)))
1515 Op, "argument out of range or not a multiple of 2", DAG)
1516 : SDValue();
1517 case Intrinsic::loongarch_lasx_xvstelm_w:
1518 return (!isShiftedInt<8, 2>(
1519 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1520 !isUInt<3>(Op.getConstantOperandVal(5)))
1522 Op, "argument out of range or not a multiple of 4", DAG)
1523 : SDValue();
1524 case Intrinsic::loongarch_lsx_vstelm_w:
1525 return (!isShiftedInt<8, 2>(
1526 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1527 !isUInt<2>(Op.getConstantOperandVal(5)))
1529 Op, "argument out of range or not a multiple of 4", DAG)
1530 : SDValue();
1531 case Intrinsic::loongarch_lasx_xvstelm_d:
1532 return (!isShiftedInt<8, 3>(
1533 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1534 !isUInt<2>(Op.getConstantOperandVal(5)))
1536 Op, "argument out of range or not a multiple of 8", DAG)
1537 : SDValue();
1538 case Intrinsic::loongarch_lsx_vstelm_d:
1539 return (!isShiftedInt<8, 3>(
1540 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1541 !isUInt<1>(Op.getConstantOperandVal(5)))
1543 Op, "argument out of range or not a multiple of 8", DAG)
1544 : SDValue();
1545 }
1546}
1547
1548SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
1549 SelectionDAG &DAG) const {
1550 SDLoc DL(Op);
1551 SDValue Lo = Op.getOperand(0);
1552 SDValue Hi = Op.getOperand(1);
1553 SDValue Shamt = Op.getOperand(2);
1554 EVT VT = Lo.getValueType();
1555
1556 // if Shamt-GRLen < 0: // Shamt < GRLen
1557 // Lo = Lo << Shamt
1558 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
1559 // else:
1560 // Lo = 0
1561 // Hi = Lo << (Shamt-GRLen)
1562
1563 SDValue Zero = DAG.getConstant(0, DL, VT);
1564 SDValue One = DAG.getConstant(1, DL, VT);
1565 SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
1566 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
1567 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
1568 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
1569
1570 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
1571 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
1572 SDValue ShiftRightLo =
1573 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
1574 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
1575 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
1576 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
1577
1578 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
1579
1580 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
1581 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
1582
1583 SDValue Parts[2] = {Lo, Hi};
1584 return DAG.getMergeValues(Parts, DL);
1585}
1586
1587SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
1588 SelectionDAG &DAG,
1589 bool IsSRA) const {
1590 SDLoc DL(Op);
1591 SDValue Lo = Op.getOperand(0);
1592 SDValue Hi = Op.getOperand(1);
1593 SDValue Shamt = Op.getOperand(2);
1594 EVT VT = Lo.getValueType();
1595
1596 // SRA expansion:
1597 // if Shamt-GRLen < 0: // Shamt < GRLen
1598 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
1599 // Hi = Hi >>s Shamt
1600 // else:
1601 // Lo = Hi >>s (Shamt-GRLen);
1602 // Hi = Hi >>s (GRLen-1)
1603 //
1604 // SRL expansion:
1605 // if Shamt-GRLen < 0: // Shamt < GRLen
1606 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
1607 // Hi = Hi >>u Shamt
1608 // else:
1609 // Lo = Hi >>u (Shamt-GRLen);
1610 // Hi = 0;
1611
1612 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
1613
1614 SDValue Zero = DAG.getConstant(0, DL, VT);
1615 SDValue One = DAG.getConstant(1, DL, VT);
1616 SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
1617 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
1618 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
1619 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
1620
1621 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
1622 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
1623 SDValue ShiftLeftHi =
1624 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
1625 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
1626 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
1627 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
1628 SDValue HiFalse =
1629 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
1630
1631 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
1632
1633 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
1634 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
1635
1636 SDValue Parts[2] = {Lo, Hi};
1637 return DAG.getMergeValues(Parts, DL);
1638}
1639
1640// Returns the opcode of the target-specific SDNode that implements the 32-bit
1641// form of the given Opcode.
1643 switch (Opcode) {
1644 default:
1645 llvm_unreachable("Unexpected opcode");
1646 case ISD::SHL:
1647 return LoongArchISD::SLL_W;
1648 case ISD::SRA:
1649 return LoongArchISD::SRA_W;
1650 case ISD::SRL:
1651 return LoongArchISD::SRL_W;
1652 case ISD::ROTR:
1653 return LoongArchISD::ROTR_W;
1654 case ISD::ROTL:
1655 return LoongArchISD::ROTL_W;
1656 case ISD::CTTZ:
1657 return LoongArchISD::CTZ_W;
1658 case ISD::CTLZ:
1659 return LoongArchISD::CLZ_W;
1660 }
1661}
1662
1663// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
1664// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
1665// otherwise be promoted to i64, making it difficult to select the
1666// SLL_W/.../*W later one because the fact the operation was originally of
1667// type i8/i16/i32 is lost.
1669 unsigned ExtOpc = ISD::ANY_EXTEND) {
1670 SDLoc DL(N);
1671 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
1672 SDValue NewOp0, NewRes;
1673
1674 switch (NumOp) {
1675 default:
1676 llvm_unreachable("Unexpected NumOp");
1677 case 1: {
1678 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
1679 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
1680 break;
1681 }
1682 case 2: {
1683 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
1684 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
1685 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
1686 break;
1687 }
1688 // TODO:Handle more NumOp.
1689 }
1690
1691 // ReplaceNodeResults requires we maintain the same type for the return
1692 // value.
1693 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
1694}
1695
1696// Helper function that emits error message for intrinsics with/without chain
1697// and return a UNDEF or and the chain as the results.
1700 StringRef ErrorMsg, bool WithChain = true) {
1701 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
1702 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
1703 if (!WithChain)
1704 return;
1705 Results.push_back(N->getOperand(0));
1706}
1707
1708template <unsigned N>
1709static void
1711 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
1712 unsigned ResOp) {
1713 const StringRef ErrorMsgOOR = "argument out of range";
1714 unsigned Imm = Node->getConstantOperandVal(2);
1715 if (!isUInt<N>(Imm)) {
1717 /*WithChain=*/false);
1718 return;
1719 }
1720 SDLoc DL(Node);
1721 SDValue Vec = Node->getOperand(1);
1722
1723 SDValue PickElt =
1724 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
1725 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
1727 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
1728 PickElt.getValue(0)));
1729}
1730
1733 SelectionDAG &DAG,
1734 const LoongArchSubtarget &Subtarget,
1735 unsigned ResOp) {
1736 SDLoc DL(N);
1737 SDValue Vec = N->getOperand(1);
1738
1739 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
1740 Results.push_back(
1741 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
1742}
1743
1744static void
1746 SelectionDAG &DAG,
1747 const LoongArchSubtarget &Subtarget) {
1748 switch (N->getConstantOperandVal(0)) {
1749 default:
1750 llvm_unreachable("Unexpected Intrinsic.");
1751 case Intrinsic::loongarch_lsx_vpickve2gr_b:
1752 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
1754 break;
1755 case Intrinsic::loongarch_lsx_vpickve2gr_h:
1756 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
1757 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
1759 break;
1760 case Intrinsic::loongarch_lsx_vpickve2gr_w:
1761 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
1763 break;
1764 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
1765 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
1767 break;
1768 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
1769 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
1770 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
1772 break;
1773 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
1774 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
1776 break;
1777 case Intrinsic::loongarch_lsx_bz_b:
1778 case Intrinsic::loongarch_lsx_bz_h:
1779 case Intrinsic::loongarch_lsx_bz_w:
1780 case Intrinsic::loongarch_lsx_bz_d:
1781 case Intrinsic::loongarch_lasx_xbz_b:
1782 case Intrinsic::loongarch_lasx_xbz_h:
1783 case Intrinsic::loongarch_lasx_xbz_w:
1784 case Intrinsic::loongarch_lasx_xbz_d:
1785 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1787 break;
1788 case Intrinsic::loongarch_lsx_bz_v:
1789 case Intrinsic::loongarch_lasx_xbz_v:
1790 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1792 break;
1793 case Intrinsic::loongarch_lsx_bnz_b:
1794 case Intrinsic::loongarch_lsx_bnz_h:
1795 case Intrinsic::loongarch_lsx_bnz_w:
1796 case Intrinsic::loongarch_lsx_bnz_d:
1797 case Intrinsic::loongarch_lasx_xbnz_b:
1798 case Intrinsic::loongarch_lasx_xbnz_h:
1799 case Intrinsic::loongarch_lasx_xbnz_w:
1800 case Intrinsic::loongarch_lasx_xbnz_d:
1801 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1803 break;
1804 case Intrinsic::loongarch_lsx_bnz_v:
1805 case Intrinsic::loongarch_lasx_xbnz_v:
1806 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1808 break;
1809 }
1810}
1811
1814 SDLoc DL(N);
1815 EVT VT = N->getValueType(0);
1816 switch (N->getOpcode()) {
1817 default:
1818 llvm_unreachable("Don't know how to legalize this operation");
1819 case ISD::SHL:
1820 case ISD::SRA:
1821 case ISD::SRL:
1822 case ISD::ROTR:
1823 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1824 "Unexpected custom legalisation");
1825 if (N->getOperand(1).getOpcode() != ISD::Constant) {
1826 Results.push_back(customLegalizeToWOp(N, DAG, 2));
1827 break;
1828 }
1829 break;
1830 case ISD::ROTL:
1831 ConstantSDNode *CN;
1832 if ((CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))) {
1833 Results.push_back(customLegalizeToWOp(N, DAG, 2));
1834 break;
1835 }
1836 break;
1837 case ISD::FP_TO_SINT: {
1838 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1839 "Unexpected custom legalisation");
1840 SDValue Src = N->getOperand(0);
1841 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
1842 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
1844 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
1845 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
1846 return;
1847 }
1848 // If the FP type needs to be softened, emit a library call using the 'si'
1849 // version. If we left it to default legalization we'd end up with 'di'.
1850 RTLIB::Libcall LC;
1851 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
1852 MakeLibCallOptions CallOptions;
1853 EVT OpVT = Src.getValueType();
1854 CallOptions.setTypeListBeforeSoften(OpVT, VT, true);
1855 SDValue Chain = SDValue();
1856 SDValue Result;
1857 std::tie(Result, Chain) =
1858 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
1859 Results.push_back(Result);
1860 break;
1861 }
1862 case ISD::BITCAST: {
1863 SDValue Src = N->getOperand(0);
1864 EVT SrcVT = Src.getValueType();
1865 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
1866 Subtarget.hasBasicF()) {
1867 SDValue Dst =
1868 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
1869 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
1870 }
1871 break;
1872 }
1873 case ISD::FP_TO_UINT: {
1874 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1875 "Unexpected custom legalisation");
1876 auto &TLI = DAG.getTargetLoweringInfo();
1877 SDValue Tmp1, Tmp2;
1878 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
1879 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
1880 break;
1881 }
1882 case ISD::BSWAP: {
1883 SDValue Src = N->getOperand(0);
1884 assert((VT == MVT::i16 || VT == MVT::i32) &&
1885 "Unexpected custom legalization");
1886 MVT GRLenVT = Subtarget.getGRLenVT();
1887 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
1888 SDValue Tmp;
1889 switch (VT.getSizeInBits()) {
1890 default:
1891 llvm_unreachable("Unexpected operand width");
1892 case 16:
1893 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
1894 break;
1895 case 32:
1896 // Only LA64 will get to here due to the size mismatch between VT and
1897 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
1898 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
1899 break;
1900 }
1901 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
1902 break;
1903 }
1904 case ISD::BITREVERSE: {
1905 SDValue Src = N->getOperand(0);
1906 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
1907 "Unexpected custom legalization");
1908 MVT GRLenVT = Subtarget.getGRLenVT();
1909 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
1910 SDValue Tmp;
1911 switch (VT.getSizeInBits()) {
1912 default:
1913 llvm_unreachable("Unexpected operand width");
1914 case 8:
1915 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
1916 break;
1917 case 32:
1918 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
1919 break;
1920 }
1921 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
1922 break;
1923 }
1924 case ISD::CTLZ:
1925 case ISD::CTTZ: {
1926 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1927 "Unexpected custom legalisation");
1928 Results.push_back(customLegalizeToWOp(N, DAG, 1));
1929 break;
1930 }
1932 SDValue Chain = N->getOperand(0);
1933 SDValue Op2 = N->getOperand(2);
1934 MVT GRLenVT = Subtarget.getGRLenVT();
1935 const StringRef ErrorMsgOOR = "argument out of range";
1936 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
1937 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
1938
1939 switch (N->getConstantOperandVal(1)) {
1940 default:
1941 llvm_unreachable("Unexpected Intrinsic.");
1942 case Intrinsic::loongarch_movfcsr2gr: {
1943 if (!Subtarget.hasBasicF()) {
1944 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
1945 return;
1946 }
1947 unsigned Imm = Op2->getAsZExtVal();
1948 if (!isUInt<2>(Imm)) {
1949 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
1950 return;
1951 }
1952 SDValue MOVFCSR2GRResults = DAG.getNode(
1953 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
1954 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
1955 Results.push_back(
1956 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
1957 Results.push_back(MOVFCSR2GRResults.getValue(1));
1958 break;
1959 }
1960#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
1961 case Intrinsic::loongarch_##NAME: { \
1962 SDValue NODE = DAG.getNode( \
1963 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
1964 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
1965 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
1966 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
1967 Results.push_back(NODE.getValue(1)); \
1968 break; \
1969 }
1970 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
1971 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
1972 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
1973 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
1974 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
1975 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
1976#undef CRC_CASE_EXT_BINARYOP
1977
1978#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
1979 case Intrinsic::loongarch_##NAME: { \
1980 SDValue NODE = DAG.getNode( \
1981 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
1982 {Chain, Op2, \
1983 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
1984 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
1985 Results.push_back(NODE.getValue(1)); \
1986 break; \
1987 }
1988 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
1989 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
1990#undef CRC_CASE_EXT_UNARYOP
1991#define CSR_CASE(ID) \
1992 case Intrinsic::loongarch_##ID: { \
1993 if (!Subtarget.is64Bit()) \
1994 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
1995 break; \
1996 }
1997 CSR_CASE(csrrd_d);
1998 CSR_CASE(csrwr_d);
1999 CSR_CASE(csrxchg_d);
2000 CSR_CASE(iocsrrd_d);
2001#undef CSR_CASE
2002 case Intrinsic::loongarch_csrrd_w: {
2003 unsigned Imm = Op2->getAsZExtVal();
2004 if (!isUInt<14>(Imm)) {
2005 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
2006 return;
2007 }
2008 SDValue CSRRDResults =
2009 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
2010 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2011 Results.push_back(
2012 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
2013 Results.push_back(CSRRDResults.getValue(1));
2014 break;
2015 }
2016 case Intrinsic::loongarch_csrwr_w: {
2017 unsigned Imm = N->getConstantOperandVal(3);
2018 if (!isUInt<14>(Imm)) {
2019 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
2020 return;
2021 }
2022 SDValue CSRWRResults =
2023 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
2024 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
2025 DAG.getConstant(Imm, DL, GRLenVT)});
2026 Results.push_back(
2027 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
2028 Results.push_back(CSRWRResults.getValue(1));
2029 break;
2030 }
2031 case Intrinsic::loongarch_csrxchg_w: {
2032 unsigned Imm = N->getConstantOperandVal(4);
2033 if (!isUInt<14>(Imm)) {
2034 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
2035 return;
2036 }
2037 SDValue CSRXCHGResults = DAG.getNode(
2038 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
2039 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
2040 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
2041 DAG.getConstant(Imm, DL, GRLenVT)});
2042 Results.push_back(
2043 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
2044 Results.push_back(CSRXCHGResults.getValue(1));
2045 break;
2046 }
2047#define IOCSRRD_CASE(NAME, NODE) \
2048 case Intrinsic::loongarch_##NAME: { \
2049 SDValue IOCSRRDResults = \
2050 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
2051 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
2052 Results.push_back( \
2053 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
2054 Results.push_back(IOCSRRDResults.getValue(1)); \
2055 break; \
2056 }
2057 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
2058 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
2059 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
2060#undef IOCSRRD_CASE
2061 case Intrinsic::loongarch_cpucfg: {
2062 SDValue CPUCFGResults =
2063 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
2064 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
2065 Results.push_back(
2066 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
2067 Results.push_back(CPUCFGResults.getValue(1));
2068 break;
2069 }
2070 case Intrinsic::loongarch_lddir_d: {
2071 if (!Subtarget.is64Bit()) {
2072 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
2073 return;
2074 }
2075 break;
2076 }
2077 }
2078 break;
2079 }
2080 case ISD::READ_REGISTER: {
2081 if (Subtarget.is64Bit())
2082 DAG.getContext()->emitError(
2083 "On LA64, only 64-bit registers can be read.");
2084 else
2085 DAG.getContext()->emitError(
2086 "On LA32, only 32-bit registers can be read.");
2087 Results.push_back(DAG.getUNDEF(VT));
2088 Results.push_back(N->getOperand(0));
2089 break;
2090 }
2092 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
2093 break;
2094 }
2095 }
2096}
2097
2100 const LoongArchSubtarget &Subtarget) {
2101 if (DCI.isBeforeLegalizeOps())
2102 return SDValue();
2103
2104 SDValue FirstOperand = N->getOperand(0);
2105 SDValue SecondOperand = N->getOperand(1);
2106 unsigned FirstOperandOpc = FirstOperand.getOpcode();
2107 EVT ValTy = N->getValueType(0);
2108 SDLoc DL(N);
2109 uint64_t lsb, msb;
2110 unsigned SMIdx, SMLen;
2111 ConstantSDNode *CN;
2112 SDValue NewOperand;
2113 MVT GRLenVT = Subtarget.getGRLenVT();
2114
2115 // Op's second operand must be a shifted mask.
2116 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
2117 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
2118 return SDValue();
2119
2120 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
2121 // Pattern match BSTRPICK.
2122 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
2123 // => BSTRPICK $dst, $src, msb, lsb
2124 // where msb = lsb + len - 1
2125
2126 // The second operand of the shift must be an immediate.
2127 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
2128 return SDValue();
2129
2130 lsb = CN->getZExtValue();
2131
2132 // Return if the shifted mask does not start at bit 0 or the sum of its
2133 // length and lsb exceeds the word's size.
2134 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
2135 return SDValue();
2136
2137 NewOperand = FirstOperand.getOperand(0);
2138 } else {
2139 // Pattern match BSTRPICK.
2140 // $dst = and $src, (2**len- 1) , if len > 12
2141 // => BSTRPICK $dst, $src, msb, lsb
2142 // where lsb = 0 and msb = len - 1
2143
2144 // If the mask is <= 0xfff, andi can be used instead.
2145 if (CN->getZExtValue() <= 0xfff)
2146 return SDValue();
2147
2148 // Return if the MSB exceeds.
2149 if (SMIdx + SMLen > ValTy.getSizeInBits())
2150 return SDValue();
2151
2152 if (SMIdx > 0) {
2153 // Omit if the constant has more than 2 uses. This a conservative
2154 // decision. Whether it is a win depends on the HW microarchitecture.
2155 // However it should always be better for 1 and 2 uses.
2156 if (CN->use_size() > 2)
2157 return SDValue();
2158 // Return if the constant can be composed by a single LU12I.W.
2159 if ((CN->getZExtValue() & 0xfff) == 0)
2160 return SDValue();
2161 // Return if the constand can be composed by a single ADDI with
2162 // the zero register.
2163 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
2164 return SDValue();
2165 }
2166
2167 lsb = SMIdx;
2168 NewOperand = FirstOperand;
2169 }
2170
2171 msb = lsb + SMLen - 1;
2172 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
2173 DAG.getConstant(msb, DL, GRLenVT),
2174 DAG.getConstant(lsb, DL, GRLenVT));
2175 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
2176 return NR0;
2177 // Try to optimize to
2178 // bstrpick $Rd, $Rs, msb, lsb
2179 // slli $Rd, $Rd, lsb
2180 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
2181 DAG.getConstant(lsb, DL, GRLenVT));
2182}
2183
2186 const LoongArchSubtarget &Subtarget) {
2187 if (DCI.isBeforeLegalizeOps())
2188 return SDValue();
2189
2190 // $dst = srl (and $src, Mask), Shamt
2191 // =>
2192 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
2193 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
2194 //
2195
2196 SDValue FirstOperand = N->getOperand(0);
2197 ConstantSDNode *CN;
2198 EVT ValTy = N->getValueType(0);
2199 SDLoc DL(N);
2200 MVT GRLenVT = Subtarget.getGRLenVT();
2201 unsigned MaskIdx, MaskLen;
2202 uint64_t Shamt;
2203
2204 // The first operand must be an AND and the second operand of the AND must be
2205 // a shifted mask.
2206 if (FirstOperand.getOpcode() != ISD::AND ||
2207 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
2208 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
2209 return SDValue();
2210
2211 // The second operand (shift amount) must be an immediate.
2212 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
2213 return SDValue();
2214
2215 Shamt = CN->getZExtValue();
2216 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
2217 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
2218 FirstOperand->getOperand(0),
2219 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
2220 DAG.getConstant(Shamt, DL, GRLenVT));
2221
2222 return SDValue();
2223}
2224
2227 const LoongArchSubtarget &Subtarget) {
2228 MVT GRLenVT = Subtarget.getGRLenVT();
2229 EVT ValTy = N->getValueType(0);
2230 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2231 ConstantSDNode *CN0, *CN1;
2232 SDLoc DL(N);
2233 unsigned ValBits = ValTy.getSizeInBits();
2234 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
2235 unsigned Shamt;
2236 bool SwapAndRetried = false;
2237
2238 if (DCI.isBeforeLegalizeOps())
2239 return SDValue();
2240
2241 if (ValBits != 32 && ValBits != 64)
2242 return SDValue();
2243
2244Retry:
2245 // 1st pattern to match BSTRINS:
2246 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
2247 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
2248 // =>
2249 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
2250 if (N0.getOpcode() == ISD::AND &&
2251 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2252 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
2253 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
2254 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2255 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
2256 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
2257 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
2258 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
2259 (MaskIdx0 + MaskLen0 <= ValBits)) {
2260 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
2261 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2262 N1.getOperand(0).getOperand(0),
2263 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
2264 DAG.getConstant(MaskIdx0, DL, GRLenVT));
2265 }
2266
2267 // 2nd pattern to match BSTRINS:
2268 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
2269 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
2270 // =>
2271 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
2272 if (N0.getOpcode() == ISD::AND &&
2273 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2274 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
2275 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
2276 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2277 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
2278 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
2279 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
2280 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
2281 (MaskIdx0 + MaskLen0 <= ValBits)) {
2282 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
2283 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2284 N1.getOperand(0).getOperand(0),
2285 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
2286 DAG.getConstant(MaskIdx0, DL, GRLenVT));
2287 }
2288
2289 // 3rd pattern to match BSTRINS:
2290 // R = or (and X, mask0), (and Y, mask1)
2291 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
2292 // =>
2293 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
2294 // where msb = lsb + size - 1
2295 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
2296 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2297 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
2298 (MaskIdx0 + MaskLen0 <= 64) &&
2299 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
2300 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
2301 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
2302 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2303 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
2304 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
2305 DAG.getConstant(ValBits == 32
2306 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
2307 : (MaskIdx0 + MaskLen0 - 1),
2308 DL, GRLenVT),
2309 DAG.getConstant(MaskIdx0, DL, GRLenVT));
2310 }
2311
2312 // 4th pattern to match BSTRINS:
2313 // R = or (and X, mask), (shl Y, shamt)
2314 // where mask = (2**shamt - 1)
2315 // =>
2316 // R = BSTRINS X, Y, ValBits - 1, shamt
2317 // where ValBits = 32 or 64
2318 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
2319 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2320 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
2321 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2322 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
2323 (MaskIdx0 + MaskLen0 <= ValBits)) {
2324 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
2325 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2326 N1.getOperand(0),
2327 DAG.getConstant((ValBits - 1), DL, GRLenVT),
2328 DAG.getConstant(Shamt, DL, GRLenVT));
2329 }
2330
2331 // 5th pattern to match BSTRINS:
2332 // R = or (and X, mask), const
2333 // where ~mask = (2**size - 1) << lsb, mask & const = 0
2334 // =>
2335 // R = BSTRINS X, (const >> lsb), msb, lsb
2336 // where msb = lsb + size - 1
2337 if (N0.getOpcode() == ISD::AND &&
2338 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2339 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
2340 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
2341 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
2342 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
2343 return DAG.getNode(
2344 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2345 DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
2346 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
2347 DAG.getConstant(MaskIdx0, DL, GRLenVT));
2348 }
2349
2350 // 6th pattern.
2351 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
2352 // by the incoming bits are known to be zero.
2353 // =>
2354 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
2355 //
2356 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
2357 // pattern is more common than the 1st. So we put the 1st before the 6th in
2358 // order to match as many nodes as possible.
2359 ConstantSDNode *CNMask, *CNShamt;
2360 unsigned MaskIdx, MaskLen;
2361 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
2362 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
2363 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
2364 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2365 CNShamt->getZExtValue() + MaskLen <= ValBits) {
2366 Shamt = CNShamt->getZExtValue();
2367 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
2368 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
2369 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
2370 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
2371 N1.getOperand(0).getOperand(0),
2372 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
2373 DAG.getConstant(Shamt, DL, GRLenVT));
2374 }
2375 }
2376
2377 // 7th pattern.
2378 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
2379 // overwritten by the incoming bits are known to be zero.
2380 // =>
2381 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
2382 //
2383 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
2384 // before the 7th in order to match as many nodes as possible.
2385 if (N1.getOpcode() == ISD::AND &&
2386 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2387 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
2388 N1.getOperand(0).getOpcode() == ISD::SHL &&
2389 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
2390 CNShamt->getZExtValue() == MaskIdx) {
2391 APInt ShMask(ValBits, CNMask->getZExtValue());
2392 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
2393 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
2394 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
2395 N1.getOperand(0).getOperand(0),
2396 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
2397 DAG.getConstant(MaskIdx, DL, GRLenVT));
2398 }
2399 }
2400
2401 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
2402 if (!SwapAndRetried) {
2403 std::swap(N0, N1);
2404 SwapAndRetried = true;
2405 goto Retry;
2406 }
2407
2408 SwapAndRetried = false;
2409Retry2:
2410 // 8th pattern.
2411 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
2412 // the incoming bits are known to be zero.
2413 // =>
2414 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
2415 //
2416 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
2417 // we put it here in order to match as many nodes as possible or generate less
2418 // instructions.
2419 if (N1.getOpcode() == ISD::AND &&
2420 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2421 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
2422 APInt ShMask(ValBits, CNMask->getZExtValue());
2423 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
2424 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
2425 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
2426 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
2427 N1->getOperand(0),
2428 DAG.getConstant(MaskIdx, DL, GRLenVT)),
2429 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
2430 DAG.getConstant(MaskIdx, DL, GRLenVT));
2431 }
2432 }
2433 // Swap N0/N1 and retry.
2434 if (!SwapAndRetried) {
2435 std::swap(N0, N1);
2436 SwapAndRetried = true;
2437 goto Retry2;
2438 }
2439
2440 return SDValue();
2441}
2442
2443// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
2446 const LoongArchSubtarget &Subtarget) {
2447 if (DCI.isBeforeLegalizeOps())
2448 return SDValue();
2449
2450 SDValue Src = N->getOperand(0);
2451 if (Src.getOpcode() != LoongArchISD::REVB_2W)
2452 return SDValue();
2453
2454 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
2455 Src.getOperand(0));
2456}
2457
2458template <unsigned N>
2460 SelectionDAG &DAG,
2461 const LoongArchSubtarget &Subtarget,
2462 bool IsSigned = false) {
2463 SDLoc DL(Node);
2464 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
2465 // Check the ImmArg.
2466 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
2467 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
2468 DAG.getContext()->emitError(Node->getOperationName(0) +
2469 ": argument out of range.");
2470 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
2471 }
2472 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
2473}
2474
2475template <unsigned N>
2476static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
2477 SelectionDAG &DAG, bool IsSigned = false) {
2478 SDLoc DL(Node);
2479 EVT ResTy = Node->getValueType(0);
2480 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
2481
2482 // Check the ImmArg.
2483 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
2484 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
2485 DAG.getContext()->emitError(Node->getOperationName(0) +
2486 ": argument out of range.");
2487 return DAG.getNode(ISD::UNDEF, DL, ResTy);
2488 }
2489 return DAG.getConstant(
2491 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
2492 DL, ResTy);
2493}
2494
2496 SDLoc DL(Node);
2497 EVT ResTy = Node->getValueType(0);
2498 SDValue Vec = Node->getOperand(2);
2499 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
2500 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
2501}
2502
2504 SDLoc DL(Node);
2505 EVT ResTy = Node->getValueType(0);
2506 SDValue One = DAG.getConstant(1, DL, ResTy);
2507 SDValue Bit =
2508 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
2509
2510 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
2511 DAG.getNOT(DL, Bit, ResTy));
2512}
2513
2514template <unsigned N>
2516 SDLoc DL(Node);
2517 EVT ResTy = Node->getValueType(0);
2518 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
2519 // Check the unsigned ImmArg.
2520 if (!isUInt<N>(CImm->getZExtValue())) {
2521 DAG.getContext()->emitError(Node->getOperationName(0) +
2522 ": argument out of range.");
2523 return DAG.getNode(ISD::UNDEF, DL, ResTy);
2524 }
2525
2526 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
2527 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
2528
2529 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
2530}
2531
2532template <unsigned N>
2534 SDLoc DL(Node);
2535 EVT ResTy = Node->getValueType(0);
2536 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
2537 // Check the unsigned ImmArg.
2538 if (!isUInt<N>(CImm->getZExtValue())) {
2539 DAG.getContext()->emitError(Node->getOperationName(0) +
2540 ": argument out of range.");
2541 return DAG.getNode(ISD::UNDEF, DL, ResTy);
2542 }
2543
2544 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
2545 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
2546 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
2547}
2548
2549template <unsigned N>
2551 SDLoc DL(Node);
2552 EVT ResTy = Node->getValueType(0);
2553 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
2554 // Check the unsigned ImmArg.
2555 if (!isUInt<N>(CImm->getZExtValue())) {
2556 DAG.getContext()->emitError(Node->getOperationName(0) +
2557 ": argument out of range.");
2558 return DAG.getNode(ISD::UNDEF, DL, ResTy);
2559 }
2560
2561 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
2562 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
2563 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
2564}
2565
2566static SDValue
2569 const LoongArchSubtarget &Subtarget) {
2570 SDLoc DL(N);
2571 switch (N->getConstantOperandVal(0)) {
2572 default:
2573 break;
2574 case Intrinsic::loongarch_lsx_vadd_b:
2575 case Intrinsic::loongarch_lsx_vadd_h:
2576 case Intrinsic::loongarch_lsx_vadd_w:
2577 case Intrinsic::loongarch_lsx_vadd_d:
2578 case Intrinsic::loongarch_lasx_xvadd_b:
2579 case Intrinsic::loongarch_lasx_xvadd_h:
2580 case Intrinsic::loongarch_lasx_xvadd_w:
2581 case Intrinsic::loongarch_lasx_xvadd_d:
2582 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
2583 N->getOperand(2));
2584 case Intrinsic::loongarch_lsx_vaddi_bu:
2585 case Intrinsic::loongarch_lsx_vaddi_hu:
2586 case Intrinsic::loongarch_lsx_vaddi_wu:
2587 case Intrinsic::loongarch_lsx_vaddi_du:
2588 case Intrinsic::loongarch_lasx_xvaddi_bu:
2589 case Intrinsic::loongarch_lasx_xvaddi_hu:
2590 case Intrinsic::loongarch_lasx_xvaddi_wu:
2591 case Intrinsic::loongarch_lasx_xvaddi_du:
2592 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
2593 lowerVectorSplatImm<5>(N, 2, DAG));
2594 case Intrinsic::loongarch_lsx_vsub_b:
2595 case Intrinsic::loongarch_lsx_vsub_h:
2596 case Intrinsic::loongarch_lsx_vsub_w:
2597 case Intrinsic::loongarch_lsx_vsub_d:
2598 case Intrinsic::loongarch_lasx_xvsub_b:
2599 case Intrinsic::loongarch_lasx_xvsub_h:
2600 case Intrinsic::loongarch_lasx_xvsub_w:
2601 case Intrinsic::loongarch_lasx_xvsub_d:
2602 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
2603 N->getOperand(2));
2604 case Intrinsic::loongarch_lsx_vsubi_bu:
2605 case Intrinsic::loongarch_lsx_vsubi_hu:
2606 case Intrinsic::loongarch_lsx_vsubi_wu:
2607 case Intrinsic::loongarch_lsx_vsubi_du:
2608 case Intrinsic::loongarch_lasx_xvsubi_bu:
2609 case Intrinsic::loongarch_lasx_xvsubi_hu:
2610 case Intrinsic::loongarch_lasx_xvsubi_wu:
2611 case Intrinsic::loongarch_lasx_xvsubi_du:
2612 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
2613 lowerVectorSplatImm<5>(N, 2, DAG));
2614 case Intrinsic::loongarch_lsx_vneg_b:
2615 case Intrinsic::loongarch_lsx_vneg_h:
2616 case Intrinsic::loongarch_lsx_vneg_w:
2617 case Intrinsic::loongarch_lsx_vneg_d:
2618 case Intrinsic::loongarch_lasx_xvneg_b:
2619 case Intrinsic::loongarch_lasx_xvneg_h:
2620 case Intrinsic::loongarch_lasx_xvneg_w:
2621 case Intrinsic::loongarch_lasx_xvneg_d:
2622 return DAG.getNode(
2623 ISD::SUB, DL, N->getValueType(0),
2624 DAG.getConstant(
2625 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
2626 /*isSigned=*/true),
2627 SDLoc(N), N->getValueType(0)),
2628 N->getOperand(1));
2629 case Intrinsic::loongarch_lsx_vmax_b:
2630 case Intrinsic::loongarch_lsx_vmax_h:
2631 case Intrinsic::loongarch_lsx_vmax_w:
2632 case Intrinsic::loongarch_lsx_vmax_d:
2633 case Intrinsic::loongarch_lasx_xvmax_b:
2634 case Intrinsic::loongarch_lasx_xvmax_h:
2635 case Intrinsic::loongarch_lasx_xvmax_w:
2636 case Intrinsic::loongarch_lasx_xvmax_d:
2637 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
2638 N->getOperand(2));
2639 case Intrinsic::loongarch_lsx_vmax_bu:
2640 case Intrinsic::loongarch_lsx_vmax_hu:
2641 case Intrinsic::loongarch_lsx_vmax_wu:
2642 case Intrinsic::loongarch_lsx_vmax_du:
2643 case Intrinsic::loongarch_lasx_xvmax_bu:
2644 case Intrinsic::loongarch_lasx_xvmax_hu:
2645 case Intrinsic::loongarch_lasx_xvmax_wu:
2646 case Intrinsic::loongarch_lasx_xvmax_du:
2647 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
2648 N->getOperand(2));
2649 case Intrinsic::loongarch_lsx_vmaxi_b:
2650 case Intrinsic::loongarch_lsx_vmaxi_h:
2651 case Intrinsic::loongarch_lsx_vmaxi_w:
2652 case Intrinsic::loongarch_lsx_vmaxi_d:
2653 case Intrinsic::loongarch_lasx_xvmaxi_b:
2654 case Intrinsic::loongarch_lasx_xvmaxi_h:
2655 case Intrinsic::loongarch_lasx_xvmaxi_w:
2656 case Intrinsic::loongarch_lasx_xvmaxi_d:
2657 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
2658 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
2659 case Intrinsic::loongarch_lsx_vmaxi_bu:
2660 case Intrinsic::loongarch_lsx_vmaxi_hu:
2661 case Intrinsic::loongarch_lsx_vmaxi_wu:
2662 case Intrinsic::loongarch_lsx_vmaxi_du:
2663 case Intrinsic::loongarch_lasx_xvmaxi_bu:
2664 case Intrinsic::loongarch_lasx_xvmaxi_hu:
2665 case Intrinsic::loongarch_lasx_xvmaxi_wu:
2666 case Intrinsic::loongarch_lasx_xvmaxi_du:
2667 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
2668 lowerVectorSplatImm<5>(N, 2, DAG));
2669 case Intrinsic::loongarch_lsx_vmin_b:
2670 case Intrinsic::loongarch_lsx_vmin_h:
2671 case Intrinsic::loongarch_lsx_vmin_w:
2672 case Intrinsic::loongarch_lsx_vmin_d:
2673 case Intrinsic::loongarch_lasx_xvmin_b:
2674 case Intrinsic::loongarch_lasx_xvmin_h:
2675 case Intrinsic::loongarch_lasx_xvmin_w:
2676 case Intrinsic::loongarch_lasx_xvmin_d:
2677 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
2678 N->getOperand(2));
2679 case Intrinsic::loongarch_lsx_vmin_bu:
2680 case Intrinsic::loongarch_lsx_vmin_hu:
2681 case Intrinsic::loongarch_lsx_vmin_wu:
2682 case Intrinsic::loongarch_lsx_vmin_du:
2683 case Intrinsic::loongarch_lasx_xvmin_bu:
2684 case Intrinsic::loongarch_lasx_xvmin_hu:
2685 case Intrinsic::loongarch_lasx_xvmin_wu:
2686 case Intrinsic::loongarch_lasx_xvmin_du:
2687 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
2688 N->getOperand(2));
2689 case Intrinsic::loongarch_lsx_vmini_b:
2690 case Intrinsic::loongarch_lsx_vmini_h:
2691 case Intrinsic::loongarch_lsx_vmini_w:
2692 case Intrinsic::loongarch_lsx_vmini_d:
2693 case Intrinsic::loongarch_lasx_xvmini_b:
2694 case Intrinsic::loongarch_lasx_xvmini_h:
2695 case Intrinsic::loongarch_lasx_xvmini_w:
2696 case Intrinsic::loongarch_lasx_xvmini_d:
2697 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
2698 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
2699 case Intrinsic::loongarch_lsx_vmini_bu:
2700 case Intrinsic::loongarch_lsx_vmini_hu:
2701 case Intrinsic::loongarch_lsx_vmini_wu:
2702 case Intrinsic::loongarch_lsx_vmini_du:
2703 case Intrinsic::loongarch_lasx_xvmini_bu:
2704 case Intrinsic::loongarch_lasx_xvmini_hu:
2705 case Intrinsic::loongarch_lasx_xvmini_wu:
2706 case Intrinsic::loongarch_lasx_xvmini_du:
2707 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
2708 lowerVectorSplatImm<5>(N, 2, DAG));
2709 case Intrinsic::loongarch_lsx_vmul_b:
2710 case Intrinsic::loongarch_lsx_vmul_h:
2711 case Intrinsic::loongarch_lsx_vmul_w:
2712 case Intrinsic::loongarch_lsx_vmul_d:
2713 case Intrinsic::loongarch_lasx_xvmul_b:
2714 case Intrinsic::loongarch_lasx_xvmul_h:
2715 case Intrinsic::loongarch_lasx_xvmul_w:
2716 case Intrinsic::loongarch_lasx_xvmul_d:
2717 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
2718 N->getOperand(2));
2719 case Intrinsic::loongarch_lsx_vmadd_b:
2720 case Intrinsic::loongarch_lsx_vmadd_h:
2721 case Intrinsic::loongarch_lsx_vmadd_w:
2722 case Intrinsic::loongarch_lsx_vmadd_d:
2723 case Intrinsic::loongarch_lasx_xvmadd_b:
2724 case Intrinsic::loongarch_lasx_xvmadd_h:
2725 case Intrinsic::loongarch_lasx_xvmadd_w:
2726 case Intrinsic::loongarch_lasx_xvmadd_d: {
2727 EVT ResTy = N->getValueType(0);
2728 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
2729 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
2730 N->getOperand(3)));
2731 }
2732 case Intrinsic::loongarch_lsx_vmsub_b:
2733 case Intrinsic::loongarch_lsx_vmsub_h:
2734 case Intrinsic::loongarch_lsx_vmsub_w:
2735 case Intrinsic::loongarch_lsx_vmsub_d:
2736 case Intrinsic::loongarch_lasx_xvmsub_b:
2737 case Intrinsic::loongarch_lasx_xvmsub_h:
2738 case Intrinsic::loongarch_lasx_xvmsub_w:
2739 case Intrinsic::loongarch_lasx_xvmsub_d: {
2740 EVT ResTy = N->getValueType(0);
2741 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
2742 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
2743 N->getOperand(3)));
2744 }
2745 case Intrinsic::loongarch_lsx_vdiv_b:
2746 case Intrinsic::loongarch_lsx_vdiv_h:
2747 case Intrinsic::loongarch_lsx_vdiv_w:
2748 case Intrinsic::loongarch_lsx_vdiv_d:
2749 case Intrinsic::loongarch_lasx_xvdiv_b:
2750 case Intrinsic::loongarch_lasx_xvdiv_h:
2751 case Intrinsic::loongarch_lasx_xvdiv_w:
2752 case Intrinsic::loongarch_lasx_xvdiv_d:
2753 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
2754 N->getOperand(2));
2755 case Intrinsic::loongarch_lsx_vdiv_bu:
2756 case Intrinsic::loongarch_lsx_vdiv_hu:
2757 case Intrinsic::loongarch_lsx_vdiv_wu:
2758 case Intrinsic::loongarch_lsx_vdiv_du:
2759 case Intrinsic::loongarch_lasx_xvdiv_bu:
2760 case Intrinsic::loongarch_lasx_xvdiv_hu:
2761 case Intrinsic::loongarch_lasx_xvdiv_wu:
2762 case Intrinsic::loongarch_lasx_xvdiv_du:
2763 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
2764 N->getOperand(2));
2765 case Intrinsic::loongarch_lsx_vmod_b:
2766 case Intrinsic::loongarch_lsx_vmod_h:
2767 case Intrinsic::loongarch_lsx_vmod_w:
2768 case Intrinsic::loongarch_lsx_vmod_d:
2769 case Intrinsic::loongarch_lasx_xvmod_b:
2770 case Intrinsic::loongarch_lasx_xvmod_h:
2771 case Intrinsic::loongarch_lasx_xvmod_w:
2772 case Intrinsic::loongarch_lasx_xvmod_d:
2773 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
2774 N->getOperand(2));
2775 case Intrinsic::loongarch_lsx_vmod_bu:
2776 case Intrinsic::loongarch_lsx_vmod_hu:
2777 case Intrinsic::loongarch_lsx_vmod_wu:
2778 case Intrinsic::loongarch_lsx_vmod_du:
2779 case Intrinsic::loongarch_lasx_xvmod_bu:
2780 case Intrinsic::loongarch_lasx_xvmod_hu:
2781 case Intrinsic::loongarch_lasx_xvmod_wu:
2782 case Intrinsic::loongarch_lasx_xvmod_du:
2783 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
2784 N->getOperand(2));
2785 case Intrinsic::loongarch_lsx_vand_v:
2786 case Intrinsic::loongarch_lasx_xvand_v:
2787 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
2788 N->getOperand(2));
2789 case Intrinsic::loongarch_lsx_vor_v:
2790 case Intrinsic::loongarch_lasx_xvor_v:
2791 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
2792 N->getOperand(2));
2793 case Intrinsic::loongarch_lsx_vxor_v:
2794 case Intrinsic::loongarch_lasx_xvxor_v:
2795 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
2796 N->getOperand(2));
2797 case Intrinsic::loongarch_lsx_vnor_v:
2798 case Intrinsic::loongarch_lasx_xvnor_v: {
2799 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
2800 N->getOperand(2));
2801 return DAG.getNOT(DL, Res, Res->getValueType(0));
2802 }
2803 case Intrinsic::loongarch_lsx_vandi_b:
2804 case Intrinsic::loongarch_lasx_xvandi_b:
2805 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
2806 lowerVectorSplatImm<8>(N, 2, DAG));
2807 case Intrinsic::loongarch_lsx_vori_b:
2808 case Intrinsic::loongarch_lasx_xvori_b:
2809 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
2810 lowerVectorSplatImm<8>(N, 2, DAG));
2811 case Intrinsic::loongarch_lsx_vxori_b:
2812 case Intrinsic::loongarch_lasx_xvxori_b:
2813 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
2814 lowerVectorSplatImm<8>(N, 2, DAG));
2815 case Intrinsic::loongarch_lsx_vsll_b:
2816 case Intrinsic::loongarch_lsx_vsll_h:
2817 case Intrinsic::loongarch_lsx_vsll_w:
2818 case Intrinsic::loongarch_lsx_vsll_d:
2819 case Intrinsic::loongarch_lasx_xvsll_b:
2820 case Intrinsic::loongarch_lasx_xvsll_h:
2821 case Intrinsic::loongarch_lasx_xvsll_w:
2822 case Intrinsic::loongarch_lasx_xvsll_d:
2823 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2824 truncateVecElts(N, DAG));
2825 case Intrinsic::loongarch_lsx_vslli_b:
2826 case Intrinsic::loongarch_lasx_xvslli_b:
2827 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2828 lowerVectorSplatImm<3>(N, 2, DAG));
2829 case Intrinsic::loongarch_lsx_vslli_h:
2830 case Intrinsic::loongarch_lasx_xvslli_h:
2831 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2832 lowerVectorSplatImm<4>(N, 2, DAG));
2833 case Intrinsic::loongarch_lsx_vslli_w:
2834 case Intrinsic::loongarch_lasx_xvslli_w:
2835 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2836 lowerVectorSplatImm<5>(N, 2, DAG));
2837 case Intrinsic::loongarch_lsx_vslli_d:
2838 case Intrinsic::loongarch_lasx_xvslli_d:
2839 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2840 lowerVectorSplatImm<6>(N, 2, DAG));
2841 case Intrinsic::loongarch_lsx_vsrl_b:
2842 case Intrinsic::loongarch_lsx_vsrl_h:
2843 case Intrinsic::loongarch_lsx_vsrl_w:
2844 case Intrinsic::loongarch_lsx_vsrl_d:
2845 case Intrinsic::loongarch_lasx_xvsrl_b:
2846 case Intrinsic::loongarch_lasx_xvsrl_h:
2847 case Intrinsic::loongarch_lasx_xvsrl_w:
2848 case Intrinsic::loongarch_lasx_xvsrl_d:
2849 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2850 truncateVecElts(N, DAG));
2851 case Intrinsic::loongarch_lsx_vsrli_b:
2852 case Intrinsic::loongarch_lasx_xvsrli_b:
2853 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2854 lowerVectorSplatImm<3>(N, 2, DAG));
2855 case Intrinsic::loongarch_lsx_vsrli_h:
2856 case Intrinsic::loongarch_lasx_xvsrli_h:
2857 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2858 lowerVectorSplatImm<4>(N, 2, DAG));
2859 case Intrinsic::loongarch_lsx_vsrli_w:
2860 case Intrinsic::loongarch_lasx_xvsrli_w:
2861 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2862 lowerVectorSplatImm<5>(N, 2, DAG));
2863 case Intrinsic::loongarch_lsx_vsrli_d:
2864 case Intrinsic::loongarch_lasx_xvsrli_d:
2865 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2866 lowerVectorSplatImm<6>(N, 2, DAG));
2867 case Intrinsic::loongarch_lsx_vsra_b:
2868 case Intrinsic::loongarch_lsx_vsra_h:
2869 case Intrinsic::loongarch_lsx_vsra_w:
2870 case Intrinsic::loongarch_lsx_vsra_d:
2871 case Intrinsic::loongarch_lasx_xvsra_b:
2872 case Intrinsic::loongarch_lasx_xvsra_h:
2873 case Intrinsic::loongarch_lasx_xvsra_w:
2874 case Intrinsic::loongarch_lasx_xvsra_d:
2875 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2876 truncateVecElts(N, DAG));
2877 case Intrinsic::loongarch_lsx_vsrai_b:
2878 case Intrinsic::loongarch_lasx_xvsrai_b:
2879 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2880 lowerVectorSplatImm<3>(N, 2, DAG));
2881 case Intrinsic::loongarch_lsx_vsrai_h:
2882 case Intrinsic::loongarch_lasx_xvsrai_h:
2883 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2884 lowerVectorSplatImm<4>(N, 2, DAG));
2885 case Intrinsic::loongarch_lsx_vsrai_w:
2886 case Intrinsic::loongarch_lasx_xvsrai_w:
2887 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2888 lowerVectorSplatImm<5>(N, 2, DAG));
2889 case Intrinsic::loongarch_lsx_vsrai_d:
2890 case Intrinsic::loongarch_lasx_xvsrai_d:
2891 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2892 lowerVectorSplatImm<6>(N, 2, DAG));
2893 case Intrinsic::loongarch_lsx_vclz_b:
2894 case Intrinsic::loongarch_lsx_vclz_h:
2895 case Intrinsic::loongarch_lsx_vclz_w:
2896 case Intrinsic::loongarch_lsx_vclz_d:
2897 case Intrinsic::loongarch_lasx_xvclz_b:
2898 case Intrinsic::loongarch_lasx_xvclz_h:
2899 case Intrinsic::loongarch_lasx_xvclz_w:
2900 case Intrinsic::loongarch_lasx_xvclz_d:
2901 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
2902 case Intrinsic::loongarch_lsx_vpcnt_b:
2903 case Intrinsic::loongarch_lsx_vpcnt_h:
2904 case Intrinsic::loongarch_lsx_vpcnt_w:
2905 case Intrinsic::loongarch_lsx_vpcnt_d:
2906 case Intrinsic::loongarch_lasx_xvpcnt_b:
2907 case Intrinsic::loongarch_lasx_xvpcnt_h:
2908 case Intrinsic::loongarch_lasx_xvpcnt_w:
2909 case Intrinsic::loongarch_lasx_xvpcnt_d:
2910 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
2911 case Intrinsic::loongarch_lsx_vbitclr_b:
2912 case Intrinsic::loongarch_lsx_vbitclr_h:
2913 case Intrinsic::loongarch_lsx_vbitclr_w:
2914 case Intrinsic::loongarch_lsx_vbitclr_d:
2915 case Intrinsic::loongarch_lasx_xvbitclr_b:
2916 case Intrinsic::loongarch_lasx_xvbitclr_h:
2917 case Intrinsic::loongarch_lasx_xvbitclr_w:
2918 case Intrinsic::loongarch_lasx_xvbitclr_d:
2919 return lowerVectorBitClear(N, DAG);
2920 case Intrinsic::loongarch_lsx_vbitclri_b:
2921 case Intrinsic::loongarch_lasx_xvbitclri_b:
2922 return lowerVectorBitClearImm<3>(N, DAG);
2923 case Intrinsic::loongarch_lsx_vbitclri_h:
2924 case Intrinsic::loongarch_lasx_xvbitclri_h:
2925 return lowerVectorBitClearImm<4>(N, DAG);
2926 case Intrinsic::loongarch_lsx_vbitclri_w:
2927 case Intrinsic::loongarch_lasx_xvbitclri_w:
2928 return lowerVectorBitClearImm<5>(N, DAG);
2929 case Intrinsic::loongarch_lsx_vbitclri_d:
2930 case Intrinsic::loongarch_lasx_xvbitclri_d:
2931 return lowerVectorBitClearImm<6>(N, DAG);
2932 case Intrinsic::loongarch_lsx_vbitset_b:
2933 case Intrinsic::loongarch_lsx_vbitset_h:
2934 case Intrinsic::loongarch_lsx_vbitset_w:
2935 case Intrinsic::loongarch_lsx_vbitset_d:
2936 case Intrinsic::loongarch_lasx_xvbitset_b:
2937 case Intrinsic::loongarch_lasx_xvbitset_h:
2938 case Intrinsic::loongarch_lasx_xvbitset_w:
2939 case Intrinsic::loongarch_lasx_xvbitset_d: {
2940 EVT VecTy = N->getValueType(0);
2941 SDValue One = DAG.getConstant(1, DL, VecTy);
2942 return DAG.getNode(
2943 ISD::OR, DL, VecTy, N->getOperand(1),
2944 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
2945 }
2946 case Intrinsic::loongarch_lsx_vbitseti_b:
2947 case Intrinsic::loongarch_lasx_xvbitseti_b:
2948 return lowerVectorBitSetImm<3>(N, DAG);
2949 case Intrinsic::loongarch_lsx_vbitseti_h:
2950 case Intrinsic::loongarch_lasx_xvbitseti_h:
2951 return lowerVectorBitSetImm<4>(N, DAG);
2952 case Intrinsic::loongarch_lsx_vbitseti_w:
2953 case Intrinsic::loongarch_lasx_xvbitseti_w:
2954 return lowerVectorBitSetImm<5>(N, DAG);
2955 case Intrinsic::loongarch_lsx_vbitseti_d:
2956 case Intrinsic::loongarch_lasx_xvbitseti_d:
2957 return lowerVectorBitSetImm<6>(N, DAG);
2958 case Intrinsic::loongarch_lsx_vbitrev_b:
2959 case Intrinsic::loongarch_lsx_vbitrev_h:
2960 case Intrinsic::loongarch_lsx_vbitrev_w:
2961 case Intrinsic::loongarch_lsx_vbitrev_d:
2962 case Intrinsic::loongarch_lasx_xvbitrev_b:
2963 case Intrinsic::loongarch_lasx_xvbitrev_h:
2964 case Intrinsic::loongarch_lasx_xvbitrev_w:
2965 case Intrinsic::loongarch_lasx_xvbitrev_d: {
2966 EVT VecTy = N->getValueType(0);
2967 SDValue One = DAG.getConstant(1, DL, VecTy);
2968 return DAG.getNode(
2969 ISD::XOR, DL, VecTy, N->getOperand(1),
2970 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
2971 }
2972 case Intrinsic::loongarch_lsx_vbitrevi_b:
2973 case Intrinsic::loongarch_lasx_xvbitrevi_b:
2974 return lowerVectorBitRevImm<3>(N, DAG);
2975 case Intrinsic::loongarch_lsx_vbitrevi_h:
2976 case Intrinsic::loongarch_lasx_xvbitrevi_h:
2977 return lowerVectorBitRevImm<4>(N, DAG);
2978 case Intrinsic::loongarch_lsx_vbitrevi_w:
2979 case Intrinsic::loongarch_lasx_xvbitrevi_w:
2980 return lowerVectorBitRevImm<5>(N, DAG);
2981 case Intrinsic::loongarch_lsx_vbitrevi_d:
2982 case Intrinsic::loongarch_lasx_xvbitrevi_d:
2983 return lowerVectorBitRevImm<6>(N, DAG);
2984 case Intrinsic::loongarch_lsx_vfadd_s:
2985 case Intrinsic::loongarch_lsx_vfadd_d:
2986 case Intrinsic::loongarch_lasx_xvfadd_s:
2987 case Intrinsic::loongarch_lasx_xvfadd_d:
2988 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
2989 N->getOperand(2));
2990 case Intrinsic::loongarch_lsx_vfsub_s:
2991 case Intrinsic::loongarch_lsx_vfsub_d:
2992 case Intrinsic::loongarch_lasx_xvfsub_s:
2993 case Intrinsic::loongarch_lasx_xvfsub_d:
2994 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
2995 N->getOperand(2));
2996 case Intrinsic::loongarch_lsx_vfmul_s:
2997 case Intrinsic::loongarch_lsx_vfmul_d:
2998 case Intrinsic::loongarch_lasx_xvfmul_s:
2999 case Intrinsic::loongarch_lasx_xvfmul_d:
3000 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
3001 N->getOperand(2));
3002 case Intrinsic::loongarch_lsx_vfdiv_s:
3003 case Intrinsic::loongarch_lsx_vfdiv_d:
3004 case Intrinsic::loongarch_lasx_xvfdiv_s:
3005 case Intrinsic::loongarch_lasx_xvfdiv_d:
3006 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
3007 N->getOperand(2));
3008 case Intrinsic::loongarch_lsx_vfmadd_s:
3009 case Intrinsic::loongarch_lsx_vfmadd_d:
3010 case Intrinsic::loongarch_lasx_xvfmadd_s:
3011 case Intrinsic::loongarch_lasx_xvfmadd_d:
3012 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
3013 N->getOperand(2), N->getOperand(3));
3014 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
3015 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
3016 N->getOperand(1), N->getOperand(2),
3017 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
3018 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
3019 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
3020 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
3021 N->getOperand(1), N->getOperand(2),
3022 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
3023 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
3024 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
3025 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
3026 N->getOperand(1), N->getOperand(2),
3027 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
3028 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
3029 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
3030 N->getOperand(1), N->getOperand(2),
3031 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
3032 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
3033 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
3034 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
3035 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
3036 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
3037 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
3038 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
3039 case Intrinsic::loongarch_lasx_xvreplgr2vr_d: {
3040 EVT ResTy = N->getValueType(0);
3041 SmallVector<SDValue> Ops(ResTy.getVectorNumElements(), N->getOperand(1));
3042 return DAG.getBuildVector(ResTy, DL, Ops);
3043 }
3044 case Intrinsic::loongarch_lsx_vreplve_b:
3045 case Intrinsic::loongarch_lsx_vreplve_h:
3046 case Intrinsic::loongarch_lsx_vreplve_w:
3047 case Intrinsic::loongarch_lsx_vreplve_d:
3048 case Intrinsic::loongarch_lasx_xvreplve_b:
3049 case Intrinsic::loongarch_lasx_xvreplve_h:
3050 case Intrinsic::loongarch_lasx_xvreplve_w:
3051 case Intrinsic::loongarch_lasx_xvreplve_d:
3052 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
3053 N->getOperand(1),
3054 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
3055 N->getOperand(2)));
3056 }
3057 return SDValue();
3058}
3059
3061 DAGCombinerInfo &DCI) const {
3062 SelectionDAG &DAG = DCI.DAG;
3063 switch (N->getOpcode()) {
3064 default:
3065 break;
3066 case ISD::AND:
3067 return performANDCombine(N, DAG, DCI, Subtarget);
3068 case ISD::OR:
3069 return performORCombine(N, DAG, DCI, Subtarget);
3070 case ISD::SRL:
3071 return performSRLCombine(N, DAG, DCI, Subtarget);
3073 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
3075 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
3076 }
3077 return SDValue();
3078}
3079
3082 if (!ZeroDivCheck)
3083 return MBB;
3084
3085 // Build instructions:
3086 // MBB:
3087 // div(or mod) $dst, $dividend, $divisor
3088 // bnez $divisor, SinkMBB
3089 // BreakMBB:
3090 // break 7 // BRK_DIVZERO
3091 // SinkMBB:
3092 // fallthrough
3093 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
3095 MachineFunction *MF = MBB->getParent();
3096 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
3097 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
3098 MF->insert(It, BreakMBB);
3099 MF->insert(It, SinkMBB);
3100
3101 // Transfer the remainder of MBB and its successor edges to SinkMBB.
3102 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
3103 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
3104
3105 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
3106 DebugLoc DL = MI.getDebugLoc();
3107 MachineOperand &Divisor = MI.getOperand(2);
3108 Register DivisorReg = Divisor.getReg();
3109
3110 // MBB:
3111 BuildMI(MBB, DL, TII.get(LoongArch::BNEZ))
3112 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
3113 .addMBB(SinkMBB);
3114 MBB->addSuccessor(BreakMBB);
3115 MBB->addSuccessor(SinkMBB);
3116
3117 // BreakMBB:
3118 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
3119 // definition of BRK_DIVZERO.
3120 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
3121 BreakMBB->addSuccessor(SinkMBB);
3122
3123 // Clear Divisor's kill flag.
3124 Divisor.setIsKill(false);
3125
3126 return SinkMBB;
3127}
3128
3129static MachineBasicBlock *
3131 const LoongArchSubtarget &Subtarget) {
3132 unsigned CondOpc;
3133 switch (MI.getOpcode()) {
3134 default:
3135 llvm_unreachable("Unexpected opcode");
3136 case LoongArch::PseudoVBZ:
3137 CondOpc = LoongArch::VSETEQZ_V;
3138 break;
3139 case LoongArch::PseudoVBZ_B:
3140 CondOpc = LoongArch::VSETANYEQZ_B;
3141 break;
3142 case LoongArch::PseudoVBZ_H:
3143 CondOpc = LoongArch::VSETANYEQZ_H;
3144 break;
3145 case LoongArch::PseudoVBZ_W:
3146 CondOpc = LoongArch::VSETANYEQZ_W;
3147 break;
3148 case LoongArch::PseudoVBZ_D:
3149 CondOpc = LoongArch::VSETANYEQZ_D;
3150 break;
3151 case LoongArch::PseudoVBNZ:
3152 CondOpc = LoongArch::VSETNEZ_V;
3153 break;
3154 case LoongArch::PseudoVBNZ_B:
3155 CondOpc = LoongArch::VSETALLNEZ_B;
3156 break;
3157 case LoongArch::PseudoVBNZ_H:
3158 CondOpc = LoongArch::VSETALLNEZ_H;
3159 break;
3160 case LoongArch::PseudoVBNZ_W:
3161 CondOpc = LoongArch::VSETALLNEZ_W;
3162 break;
3163 case LoongArch::PseudoVBNZ_D:
3164 CondOpc = LoongArch::VSETALLNEZ_D;
3165 break;
3166 case LoongArch::PseudoXVBZ:
3167 CondOpc = LoongArch::XVSETEQZ_V;
3168 break;
3169 case LoongArch::PseudoXVBZ_B:
3170 CondOpc = LoongArch::XVSETANYEQZ_B;
3171 break;
3172 case LoongArch::PseudoXVBZ_H:
3173 CondOpc = LoongArch::XVSETANYEQZ_H;
3174 break;
3175 case LoongArch::PseudoXVBZ_W:
3176 CondOpc = LoongArch::XVSETANYEQZ_W;
3177 break;
3178 case LoongArch::PseudoXVBZ_D:
3179 CondOpc = LoongArch::XVSETANYEQZ_D;
3180 break;
3181 case LoongArch::PseudoXVBNZ:
3182 CondOpc = LoongArch::XVSETNEZ_V;
3183 break;
3184 case LoongArch::PseudoXVBNZ_B:
3185 CondOpc = LoongArch::XVSETALLNEZ_B;
3186 break;
3187 case LoongArch::PseudoXVBNZ_H:
3188 CondOpc = LoongArch::XVSETALLNEZ_H;
3189 break;
3190 case LoongArch::PseudoXVBNZ_W:
3191 CondOpc = LoongArch::XVSETALLNEZ_W;
3192 break;
3193 case LoongArch::PseudoXVBNZ_D:
3194 CondOpc = LoongArch::XVSETALLNEZ_D;
3195 break;
3196 }
3197
3198 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3199 const BasicBlock *LLVM_BB = BB->getBasicBlock();
3200 DebugLoc DL = MI.getDebugLoc();
3203
3204 MachineFunction *F = BB->getParent();
3205 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
3206 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
3207 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
3208
3209 F->insert(It, FalseBB);
3210 F->insert(It, TrueBB);
3211 F->insert(It, SinkBB);
3212
3213 // Transfer the remainder of MBB and its successor edges to Sink.
3214 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
3216
3217 // Insert the real instruction to BB.
3218 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
3219 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
3220
3221 // Insert branch.
3222 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
3223 BB->addSuccessor(FalseBB);
3224 BB->addSuccessor(TrueBB);
3225
3226 // FalseBB.
3227 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
3228 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
3229 .addReg(LoongArch::R0)
3230 .addImm(0);
3231 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
3232 FalseBB->addSuccessor(SinkBB);
3233
3234 // TrueBB.
3235 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
3236 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
3237 .addReg(LoongArch::R0)
3238 .addImm(1);
3239 TrueBB->addSuccessor(SinkBB);
3240
3241 // SinkBB: merge the results.
3242 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
3243 MI.getOperand(0).getReg())
3244 .addReg(RD1)
3245 .addMBB(FalseBB)
3246 .addReg(RD2)
3247 .addMBB(TrueBB);
3248
3249 // The pseudo instruction is gone now.
3250 MI.eraseFromParent();
3251 return SinkBB;
3252}
3253
3254static MachineBasicBlock *
3256 const LoongArchSubtarget &Subtarget) {
3257 unsigned InsOp;
3258 unsigned HalfSize;
3259 switch (MI.getOpcode()) {
3260 default:
3261 llvm_unreachable("Unexpected opcode");
3262 case LoongArch::PseudoXVINSGR2VR_B:
3263 HalfSize = 16;
3264 InsOp = LoongArch::VINSGR2VR_B;
3265 break;
3266 case LoongArch::PseudoXVINSGR2VR_H:
3267 HalfSize = 8;
3268 InsOp = LoongArch::VINSGR2VR_H;
3269 break;
3270 }
3271 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3272 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
3273 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
3274 DebugLoc DL = MI.getDebugLoc();
3276 // XDst = vector_insert XSrc, Elt, Idx
3277 Register XDst = MI.getOperand(0).getReg();
3278 Register XSrc = MI.getOperand(1).getReg();
3279 Register Elt = MI.getOperand(2).getReg();
3280 unsigned Idx = MI.getOperand(3).getImm();
3281
3282 Register ScratchReg1 = XSrc;
3283 if (Idx >= HalfSize) {
3284 ScratchReg1 = MRI.createVirtualRegister(RC);
3285 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1)
3286 .addReg(XSrc)
3287 .addReg(XSrc)
3288 .addImm(1);
3289 }
3290
3291 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
3292 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
3293 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
3294 .addReg(ScratchReg1, 0, LoongArch::sub_128);
3295 BuildMI(*BB, MI, DL, TII->get(InsOp), ScratchSubReg2)
3296 .addReg(ScratchSubReg1)
3297 .addReg(Elt)
3298 .addImm(Idx >= HalfSize ? Idx - HalfSize : Idx);
3299
3300 Register ScratchReg2 = XDst;
3301 if (Idx >= HalfSize)
3302 ScratchReg2 = MRI.createVirtualRegister(RC);
3303
3304 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), ScratchReg2)
3305 .addImm(0)
3306 .addReg(ScratchSubReg2)
3307 .addImm(LoongArch::sub_128);
3308
3309 if (Idx >= HalfSize)
3310 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), XDst)
3311 .addReg(XSrc)
3312 .addReg(ScratchReg2)
3313 .addImm(2);
3314
3315 MI.eraseFromParent();
3316 return BB;
3317}
3318
3319MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
3320 MachineInstr &MI, MachineBasicBlock *BB) const {
3321 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3322 DebugLoc DL = MI.getDebugLoc();
3323
3324 switch (MI.getOpcode()) {
3325 default:
3326 llvm_unreachable("Unexpected instr type to insert");
3327 case LoongArch::DIV_W:
3328 case LoongArch::DIV_WU:
3329 case LoongArch::MOD_W:
3330 case LoongArch::MOD_WU:
3331 case LoongArch::DIV_D:
3332 case LoongArch::DIV_DU:
3333 case LoongArch::MOD_D:
3334 case LoongArch::MOD_DU:
3335 return insertDivByZeroTrap(MI, BB);
3336 break;
3337 case LoongArch::WRFCSR: {
3338 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
3339 LoongArch::FCSR0 + MI.getOperand(0).getImm())
3340 .addReg(MI.getOperand(1).getReg());
3341 MI.eraseFromParent();
3342 return BB;
3343 }
3344 case LoongArch::RDFCSR: {
3345 MachineInstr *ReadFCSR =
3346 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
3347 MI.getOperand(0).getReg())
3348 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
3349 ReadFCSR->getOperand(1).setIsUndef();
3350 MI.eraseFromParent();
3351 return BB;
3352 }
3353 case LoongArch::PseudoVBZ:
3354 case LoongArch::PseudoVBZ_B:
3355 case LoongArch::PseudoVBZ_H:
3356 case LoongArch::PseudoVBZ_W:
3357 case LoongArch::PseudoVBZ_D:
3358 case LoongArch::PseudoVBNZ:
3359 case LoongArch::PseudoVBNZ_B:
3360 case LoongArch::PseudoVBNZ_H:
3361 case LoongArch::PseudoVBNZ_W:
3362 case LoongArch::PseudoVBNZ_D:
3363 case LoongArch::PseudoXVBZ:
3364 case LoongArch::PseudoXVBZ_B:
3365 case LoongArch::PseudoXVBZ_H:
3366 case LoongArch::PseudoXVBZ_W:
3367 case LoongArch::PseudoXVBZ_D:
3368 case LoongArch::PseudoXVBNZ:
3369 case LoongArch::PseudoXVBNZ_B:
3370 case LoongArch::PseudoXVBNZ_H:
3371 case LoongArch::PseudoXVBNZ_W:
3372 case LoongArch::PseudoXVBNZ_D:
3373 return emitVecCondBranchPseudo(MI, BB, Subtarget);
3374 case LoongArch::PseudoXVINSGR2VR_B:
3375 case LoongArch::PseudoXVINSGR2VR_H:
3376 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
3377 }
3378}
3379
3381 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
3382 unsigned *Fast) const {
3383 if (!Subtarget.hasUAL())
3384 return false;
3385
3386 // TODO: set reasonable speed number.
3387 if (Fast)
3388 *Fast = 1;
3389 return true;
3390}
3391
3392const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
3393 switch ((LoongArchISD::NodeType)Opcode) {
3395 break;
3396
3397#define NODE_NAME_CASE(node) \
3398 case LoongArchISD::node: \
3399 return "LoongArchISD::" #node;
3400
3401 // TODO: Add more target-dependent nodes later.
3402 NODE_NAME_CASE(CALL)
3403 NODE_NAME_CASE(CALL_MEDIUM)
3404 NODE_NAME_CASE(CALL_LARGE)
3405 NODE_NAME_CASE(RET)
3406 NODE_NAME_CASE(TAIL)
3407 NODE_NAME_CASE(TAIL_MEDIUM)
3408 NODE_NAME_CASE(TAIL_LARGE)
3409 NODE_NAME_CASE(SLL_W)
3410 NODE_NAME_CASE(SRA_W)
3411 NODE_NAME_CASE(SRL_W)
3412 NODE_NAME_CASE(BSTRINS)
3413 NODE_NAME_CASE(BSTRPICK)
3414 NODE_NAME_CASE(MOVGR2FR_W_LA64)
3415 NODE_NAME_CASE(MOVFR2GR_S_LA64)
3416 NODE_NAME_CASE(FTINT)
3417 NODE_NAME_CASE(REVB_2H)
3418 NODE_NAME_CASE(REVB_2W)
3419 NODE_NAME_CASE(BITREV_4B)
3420 NODE_NAME_CASE(BITREV_W)
3421 NODE_NAME_CASE(ROTR_W)
3422 NODE_NAME_CASE(ROTL_W)
3423 NODE_NAME_CASE(CLZ_W)
3424 NODE_NAME_CASE(CTZ_W)
3425 NODE_NAME_CASE(DBAR)
3426 NODE_NAME_CASE(IBAR)
3427 NODE_NAME_CASE(BREAK)
3428 NODE_NAME_CASE(SYSCALL)
3429 NODE_NAME_CASE(CRC_W_B_W)
3430 NODE_NAME_CASE(CRC_W_H_W)
3431 NODE_NAME_CASE(CRC_W_W_W)
3432 NODE_NAME_CASE(CRC_W_D_W)
3433 NODE_NAME_CASE(CRCC_W_B_W)
3434 NODE_NAME_CASE(CRCC_W_H_W)
3435 NODE_NAME_CASE(CRCC_W_W_W)
3436 NODE_NAME_CASE(CRCC_W_D_W)
3437 NODE_NAME_CASE(CSRRD)
3438 NODE_NAME_CASE(CSRWR)
3439 NODE_NAME_CASE(CSRXCHG)
3440 NODE_NAME_CASE(IOCSRRD_B)
3441 NODE_NAME_CASE(IOCSRRD_H)
3442 NODE_NAME_CASE(IOCSRRD_W)
3443 NODE_NAME_CASE(IOCSRRD_D)
3444 NODE_NAME_CASE(IOCSRWR_B)
3445 NODE_NAME_CASE(IOCSRWR_H)
3446 NODE_NAME_CASE(IOCSRWR_W)
3447 NODE_NAME_CASE(IOCSRWR_D)
3448 NODE_NAME_CASE(CPUCFG)
3449 NODE_NAME_CASE(MOVGR2FCSR)
3450 NODE_NAME_CASE(MOVFCSR2GR)
3451 NODE_NAME_CASE(CACOP_D)
3452 NODE_NAME_CASE(CACOP_W)
3453 NODE_NAME_CASE(VPICK_SEXT_ELT)
3454 NODE_NAME_CASE(VPICK_ZEXT_ELT)
3455 NODE_NAME_CASE(VREPLVE)
3456 NODE_NAME_CASE(VALL_ZERO)
3457 NODE_NAME_CASE(VANY_ZERO)
3458 NODE_NAME_CASE(VALL_NONZERO)
3459 NODE_NAME_CASE(VANY_NONZERO)
3460 }
3461#undef NODE_NAME_CASE
3462 return nullptr;
3463}
3464
3465//===----------------------------------------------------------------------===//
3466// Calling Convention Implementation
3467//===----------------------------------------------------------------------===//
3468
3469// Eight general-purpose registers a0-a7 used for passing integer arguments,
3470// with a0-a1 reused to return values. Generally, the GPRs are used to pass
3471// fixed-point arguments, and floating-point arguments when no FPR is available
3472// or with soft float ABI.
3473const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
3474 LoongArch::R7, LoongArch::R8, LoongArch::R9,
3475 LoongArch::R10, LoongArch::R11};
3476// Eight floating-point registers fa0-fa7 used for passing floating-point
3477// arguments, and fa0-fa1 are also used to return values.
3478const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
3479 LoongArch::F3, LoongArch::F4, LoongArch::F5,
3480 LoongArch::F6, LoongArch::F7};
3481// FPR32 and FPR64 alias each other.
3483 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
3484 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
3485
3486const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
3487 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
3488 LoongArch::VR6, LoongArch::VR7};
3489
3490const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
3491 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
3492 LoongArch::XR6, LoongArch::XR7};
3493
3494// Pass a 2*GRLen argument that has been split into two GRLen values through
3495// registers or the stack as necessary.
3496static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
3497 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
3498 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
3499 ISD::ArgFlagsTy ArgFlags2) {
3500 unsigned GRLenInBytes = GRLen / 8;
3501 if (Register Reg = State.AllocateReg(ArgGPRs)) {
3502 // At least one half can be passed via register.
3503 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
3504 VA1.getLocVT(), CCValAssign::Full));
3505 } else {
3506 // Both halves must be passed on the stack, with proper alignment.
3507 Align StackAlign =
3508 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
3509 State.addLoc(
3511 State.AllocateStack(GRLenInBytes, StackAlign),
3512 VA1.getLocVT(), CCValAssign::Full));
3514 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
3515 LocVT2, CCValAssign::Full));
3516 return false;
3517 }
3518 if (Register Reg = State.AllocateReg(ArgGPRs)) {
3519 // The second half can also be passed via register.
3520 State.addLoc(
3521 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
3522 } else {
3523 // The second half is passed via the stack, without additional alignment.
3525 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
3526 LocVT2, CCValAssign::Full));
3527 }
3528 return false;
3529}
3530
3531// Implements the LoongArch calling convention. Returns true upon failure.
3533 unsigned ValNo, MVT ValVT,
3534 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
3535 CCState &State, bool IsFixed, bool IsRet,
3536 Type *OrigTy) {
3537 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
3538 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
3539 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
3540 MVT LocVT = ValVT;
3541
3542 // Any return value split into more than two values can't be returned
3543 // directly.
3544 if (IsRet && ValNo > 1)
3545 return true;
3546
3547 // If passing a variadic argument, or if no FPR is available.
3548 bool UseGPRForFloat = true;
3549
3550 switch (ABI) {
3551 default:
3552 llvm_unreachable("Unexpected ABI");
3556 report_fatal_error("Unimplemented ABI");
3557 break;
3560 UseGPRForFloat = !IsFixed;
3561 break;
3563 break;
3564 }
3565
3566 // FPR32 and FPR64 alias each other.
3567 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
3568 UseGPRForFloat = true;
3569
3570 if (UseGPRForFloat && ValVT == MVT::f32) {
3571 LocVT = GRLenVT;
3572 LocInfo = CCValAssign::BCvt;
3573 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
3574 LocVT = MVT::i64;
3575 LocInfo = CCValAssign::BCvt;
3576 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
3577 // TODO: Handle passing f64 on LA32 with D feature.
3578 report_fatal_error("Passing f64 with GPR on LA32 is undefined");
3579 }
3580
3581 // If this is a variadic argument, the LoongArch calling convention requires
3582 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
3583 // byte alignment. An aligned register should be used regardless of whether
3584 // the original argument was split during legalisation or not. The argument
3585 // will not be passed by registers if the original type is larger than
3586 // 2*GRLen, so the register alignment rule does not apply.
3587 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
3588 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
3589 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
3590 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
3591 // Skip 'odd' register if necessary.
3592 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
3593 State.AllocateReg(ArgGPRs);
3594 }
3595
3596 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
3597 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
3598 State.getPendingArgFlags();
3599
3600 assert(PendingLocs.size() == PendingArgFlags.size() &&
3601 "PendingLocs and PendingArgFlags out of sync");
3602
3603 // Split arguments might be passed indirectly, so keep track of the pending
3604 // values.
3605 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
3606 LocVT = GRLenVT;
3607 LocInfo = CCValAssign::Indirect;
3608 PendingLocs.push_back(
3609 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
3610 PendingArgFlags.push_back(ArgFlags);
3611 if (!ArgFlags.isSplitEnd()) {
3612 return false;
3613 }
3614 }
3615
3616 // If the split argument only had two elements, it should be passed directly
3617 // in registers or on the stack.
3618 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
3619 PendingLocs.size() <= 2) {
3620 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
3621 // Apply the normal calling convention rules to the first half of the
3622 // split argument.
3623 CCValAssign VA = PendingLocs[0];
3624 ISD::ArgFlagsTy AF = PendingArgFlags[0];
3625 PendingLocs.clear();
3626 PendingArgFlags.clear();
3627 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
3628 ArgFlags);
3629 }
3630
3631 // Allocate to a register if possible, or else a stack slot.
3632 Register Reg;
3633 unsigned StoreSizeBytes = GRLen / 8;
3634 Align StackAlign = Align(GRLen / 8);
3635
3636 if (ValVT == MVT::f32 && !UseGPRForFloat)
3637 Reg = State.AllocateReg(ArgFPR32s);
3638 else if (ValVT == MVT::f64 && !UseGPRForFloat)
3639 Reg = State.AllocateReg(ArgFPR64s);
3640 else if (ValVT.is128BitVector())
3641 Reg = State.AllocateReg(ArgVRs);
3642 else if (ValVT.is256BitVector())
3643 Reg = State.AllocateReg(ArgXRs);
3644 else
3645 Reg = State.AllocateReg(ArgGPRs);
3646
3647 unsigned StackOffset =
3648 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
3649
3650 // If we reach this point and PendingLocs is non-empty, we must be at the
3651 // end of a split argument that must be passed indirectly.
3652 if (!PendingLocs.empty()) {
3653 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
3654 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
3655 for (auto &It : PendingLocs) {
3656 if (Reg)
3657 It.convertToReg(Reg);
3658 else
3659 It.convertToMem(StackOffset);
3660 State.addLoc(It);
3661 }
3662 PendingLocs.clear();
3663 PendingArgFlags.clear();
3664 return false;
3665 }
3666 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
3667 "Expected an GRLenVT at this stage");
3668
3669 if (Reg) {
3670 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3671 return false;
3672 }
3673
3674 // When a floating-point value is passed on the stack, no bit-cast is needed.
3675 if (ValVT.isFloatingPoint()) {
3676 LocVT = ValVT;
3677 LocInfo = CCValAssign::Full;
3678 }
3679
3680 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
3681 return false;
3682}
3683
3684void LoongArchTargetLowering::analyzeInputArgs(
3685 MachineFunction &MF, CCState &CCInfo,
3686 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
3687 LoongArchCCAssignFn Fn) const {
3689 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
3690 MVT ArgVT = Ins[i].VT;
3691 Type *ArgTy = nullptr;
3692 if (IsRet)
3693 ArgTy = FType->getReturnType();
3694 else if (Ins[i].isOrigArg())
3695 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
3698 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
3699 CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) {
3700 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
3701 << '\n');
3702 llvm_unreachable("");
3703 }
3704 }
3705}
3706
3707void LoongArchTargetLowering::analyzeOutputArgs(
3708 MachineFunction &MF, CCState &CCInfo,
3709 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
3710 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
3711 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
3712 MVT ArgVT = Outs[i].VT;
3713 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
3716 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
3717 CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
3718 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
3719 << "\n");
3720 llvm_unreachable("");
3721 }
3722 }
3723}
3724
3725// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
3726// values.
3728 const CCValAssign &VA, const SDLoc &DL) {
3729 switch (VA.getLocInfo()) {
3730 default:
3731 llvm_unreachable("Unexpected CCValAssign::LocInfo");
3732 case CCValAssign::Full:
3734 break;
3735 case CCValAssign::BCvt:
3736 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
3737 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
3738 else
3739 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
3740 break;
3741 }
3742 return Val;
3743}
3744
3746 const CCValAssign &VA, const SDLoc &DL,
3747 const LoongArchTargetLowering &TLI) {
3750 EVT LocVT = VA.getLocVT();
3751 SDValue Val;
3752 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
3753 Register VReg = RegInfo.createVirtualRegister(RC);
3754 RegInfo.addLiveIn(VA.getLocReg(), VReg);
3755 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
3756
3757 return convertLocVTToValVT(DAG, Val, VA, DL);
3758}
3759
3760// The caller is responsible for loading the full value if the argument is
3761// passed with CCValAssign::Indirect.
3763 const CCValAssign &VA, const SDLoc &DL) {
3765 MachineFrameInfo &MFI = MF.getFrameInfo();
3766 EVT ValVT = VA.getValVT();
3767 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
3768 /*IsImmutable=*/true);
3769 SDValue FIN = DAG.getFrameIndex(
3771
3772 ISD::LoadExtType ExtType;
3773 switch (VA.getLocInfo()) {
3774 default:
3775 llvm_unreachable("Unexpected CCValAssign::LocInfo");
3776 case CCValAssign::Full:
3778 case CCValAssign::BCvt:
3779 ExtType = ISD::NON_EXTLOAD;
3780 break;
3781 }
3782 return DAG.getExtLoad(
3783 ExtType, DL, VA.getLocVT(), Chain, FIN,
3785}
3786
3788 const CCValAssign &VA, const SDLoc &DL) {
3789 EVT LocVT = VA.getLocVT();
3790
3791 switch (VA.getLocInfo()) {
3792 default:
3793 llvm_unreachable("Unexpected CCValAssign::LocInfo");
3794 case CCValAssign::Full:
3795 break;
3796 case CCValAssign::BCvt:
3797 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
3798 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
3799 else
3800 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
3801 break;
3802 }
3803 return Val;
3804}
3805
3806static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
3807 CCValAssign::LocInfo LocInfo,
3808 ISD::ArgFlagsTy ArgFlags, CCState &State) {
3809 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
3810 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
3811 // s0 s1 s2 s3 s4 s5 s6 s7 s8
3812 static const MCPhysReg GPRList[] = {
3813 LoongArch::R23, LoongArch::R24, LoongArch::R25,
3814 LoongArch::R26, LoongArch::R27, LoongArch::R28,
3815 LoongArch::R29, LoongArch::R30, LoongArch::R31};
3816 if (unsigned Reg = State.AllocateReg(GPRList)) {
3817 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3818 return false;
3819 }
3820 }
3821
3822 if (LocVT == MVT::f32) {
3823 // Pass in STG registers: F1, F2, F3, F4
3824 // fs0,fs1,fs2,fs3
3825 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
3826 LoongArch::F26, LoongArch::F27};
3827 if (unsigned Reg = State.AllocateReg(FPR32List)) {
3828 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3829 return false;
3830 }
3831 }
3832
3833 if (LocVT == MVT::f64) {
3834 // Pass in STG registers: D1, D2, D3, D4
3835 // fs4,fs5,fs6,fs7
3836 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
3837 LoongArch::F30_64, LoongArch::F31_64};
3838 if (unsigned Reg = State.AllocateReg(FPR64List)) {
3839 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3840 return false;
3841 }
3842 }
3843
3844 report_fatal_error("No registers left in GHC calling convention");
3845 return true;
3846}
3847
3848// Transform physical registers into virtual registers.
3850 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
3851 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
3852 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3853
3855
3856 switch (CallConv) {
3857 default:
3858 llvm_unreachable("Unsupported calling convention");
3859 case CallingConv::C:
3860 case CallingConv::Fast:
3861 break;
3862 case CallingConv::GHC:
3863 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
3864 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
3866 "GHC calling convention requires the F and D extensions");
3867 }
3868
3869 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3870 MVT GRLenVT = Subtarget.getGRLenVT();
3871 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
3872 // Used with varargs to acumulate store chains.
3873 std::vector<SDValue> OutChains;
3874
3875 // Assign locations to all of the incoming arguments.
3877 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
3878
3879 if (CallConv == CallingConv::GHC)
3881 else
3882 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
3883
3884 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3885 CCValAssign &VA = ArgLocs[i];
3886 SDValue ArgValue;
3887 if (VA.isRegLoc())
3888 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this);
3889 else
3890 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
3891 if (VA.getLocInfo() == CCValAssign::Indirect) {
3892 // If the original argument was split and passed by reference, we need to
3893 // load all parts of it here (using the same address).
3894 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
3896 unsigned ArgIndex = Ins[i].OrigArgIndex;
3897 unsigned ArgPartOffset = Ins[i].PartOffset;
3898 assert(ArgPartOffset == 0);
3899 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
3900 CCValAssign &PartVA = ArgLocs[i + 1];
3901 unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
3902 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
3903 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
3904 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
3906 ++i;
3907 }
3908 continue;
3909 }
3910 InVals.push_back(ArgValue);
3911 }
3912
3913 if (IsVarArg) {
3915 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
3916 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
3917 MachineFrameInfo &MFI = MF.getFrameInfo();
3918 MachineRegisterInfo &RegInfo = MF.getRegInfo();
3919 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
3920
3921 // Offset of the first variable argument from stack pointer, and size of
3922 // the vararg save area. For now, the varargs save area is either zero or
3923 // large enough to hold a0-a7.
3924 int VaArgOffset, VarArgsSaveSize;
3925
3926 // If all registers are allocated, then all varargs must be passed on the
3927 // stack and we don't need to save any argregs.
3928 if (ArgRegs.size() == Idx) {
3929 VaArgOffset = CCInfo.getStackSize();
3930 VarArgsSaveSize = 0;
3931 } else {
3932 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
3933 VaArgOffset = -VarArgsSaveSize;
3934 }
3935
3936 // Record the frame index of the first variable argument
3937 // which is a value necessary to VASTART.
3938 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
3939 LoongArchFI->setVarArgsFrameIndex(FI);
3940
3941 // If saving an odd number of registers then create an extra stack slot to
3942 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
3943 // offsets to even-numbered registered remain 2*GRLen-aligned.
3944 if (Idx % 2) {
3945 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
3946 true);
3947 VarArgsSaveSize += GRLenInBytes;
3948 }
3949
3950 // Copy the integer registers that may have been used for passing varargs
3951 // to the vararg save area.
3952 for (unsigned I = Idx; I < ArgRegs.size();
3953 ++I, VaArgOffset += GRLenInBytes) {
3954 const Register Reg = RegInfo.createVirtualRegister(RC);
3955 RegInfo.addLiveIn(ArgRegs[I], Reg);
3956 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
3957 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
3958 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3959 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
3961 cast<StoreSDNode>(Store.getNode())
3962 ->getMemOperand()
3963 ->setValue((Value *)nullptr);
3964 OutChains.push_back(Store);
3965 }
3966 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
3967 }
3968
3969 // All stores are grouped in one node to allow the matching between
3970 // the size of Ins and InVals. This only happens for vararg functions.
3971 if (!OutChains.empty()) {
3972 OutChains.push_back(Chain);
3973 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
3974 }
3975
3976 return Chain;
3977}
3978
3980 return CI->isTailCall();
3981}
3982
3983// Check if the return value is used as only a return value, as otherwise
3984// we can't perform a tail-call.
3986 SDValue &Chain) const {
3987 if (N->getNumValues() != 1)
3988 return false;
3989 if (!N->hasNUsesOfValue(1, 0))
3990 return false;
3991
3992 SDNode *Copy = *N->use_begin();
3993 if (Copy->getOpcode() != ISD::CopyToReg)
3994 return false;
3995
3996 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
3997 // isn't safe to perform a tail call.
3998 if (Copy->getGluedNode())
3999 return false;
4000
4001 // The copy must be used by a LoongArchISD::RET, and nothing else.
4002 bool HasRet = false;
4003 for (SDNode *Node : Copy->uses()) {
4004 if (Node->getOpcode() != LoongArchISD::RET)
4005 return false;
4006 HasRet = true;
4007 }
4008
4009 if (!HasRet)
4010 return false;
4011
4012 Chain = Copy->getOperand(0);
4013 return true;
4014}
4015
4016// Check whether the call is eligible for tail call optimization.
4017bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
4018 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
4019 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
4020
4021 auto CalleeCC = CLI.CallConv;
4022 auto &Outs = CLI.Outs;
4023 auto &Caller = MF.getFunction();
4024 auto CallerCC = Caller.getCallingConv();
4025
4026 // Do not tail call opt if the stack is used to pass parameters.
4027 if (CCInfo.getStackSize() != 0)
4028 return false;
4029
4030 // Do not tail call opt if any parameters need to be passed indirectly.
4031 for (auto &VA : ArgLocs)
4032 if (VA.getLocInfo() == CCValAssign::Indirect)
4033 return false;
4034
4035 // Do not tail call opt if either caller or callee uses struct return
4036 // semantics.
4037 auto IsCallerStructRet = Caller.hasStructRetAttr();
4038 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
4039 if (IsCallerStructRet || IsCalleeStructRet)
4040 return false;
4041
4042 // Do not tail call opt if either the callee or caller has a byval argument.
4043 for (auto &Arg : Outs)
4044 if (Arg.Flags.isByVal())
4045 return false;
4046
4047 // The callee has to preserve all registers the caller needs to preserve.
4048 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
4049 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
4050 if (CalleeCC != CallerCC) {
4051 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
4052 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
4053 return false;
4054 }
4055 return true;
4056}
4057
4059 return DAG.getDataLayout().getPrefTypeAlign(
4060 VT.getTypeForEVT(*DAG.getContext()));
4061}
4062
4063// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
4064// and output parameter nodes.
4065SDValue
4067 SmallVectorImpl<SDValue> &InVals) const {
4068 SelectionDAG &DAG = CLI.DAG;
4069 SDLoc &DL = CLI.DL;
4071 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
4073 SDValue Chain = CLI.Chain;
4074 SDValue Callee = CLI.Callee;
4075 CallingConv::ID CallConv = CLI.CallConv;
4076 bool IsVarArg = CLI.IsVarArg;
4077 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4078 MVT GRLenVT = Subtarget.getGRLenVT();
4079 bool &IsTailCall = CLI.IsTailCall;
4080
4082
4083 // Analyze the operands of the call, assigning locations to each operand.
4085 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
4086
4087 if (CallConv == CallingConv::GHC)
4088 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
4089 else
4090 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
4091
4092 // Check if it's really possible to do a tail call.
4093 if (IsTailCall)
4094 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
4095
4096 if (IsTailCall)
4097 ++NumTailCalls;
4098 else if (CLI.CB && CLI.CB->isMustTailCall())
4099 report_fatal_error("failed to perform tail call elimination on a call "
4100 "site marked musttail");
4101
4102 // Get a count of how many bytes are to be pushed on the stack.
4103 unsigned NumBytes = ArgCCInfo.getStackSize();
4104
4105 // Create local copies for byval args.
4106 SmallVector<SDValue> ByValArgs;
4107 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
4108 ISD::ArgFlagsTy Flags = Outs[i].Flags;
4109 if (!Flags.isByVal())
4110 continue;
4111
4112 SDValue Arg = OutVals[i];
4113 unsigned Size = Flags.getByValSize();
4114 Align Alignment = Flags.getNonZeroByValAlign();
4115
4116 int FI =
4117 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
4118 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4119 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
4120
4121 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
4122 /*IsVolatile=*/false,
4123 /*AlwaysInline=*/false, /*isTailCall=*/IsTailCall,
4125 ByValArgs.push_back(FIPtr);
4126 }
4127
4128 if (!IsTailCall)
4129 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
4130
4131 // Copy argument values to their designated locations.
4133 SmallVector<SDValue> MemOpChains;
4134 SDValue StackPtr;
4135 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
4136 CCValAssign &VA = ArgLocs[i];
4137 SDValue ArgValue = OutVals[i];
4138 ISD::ArgFlagsTy Flags = Outs[i].Flags;
4139
4140 // Promote the value if needed.
4141 // For now, only handle fully promoted and indirect arguments.
4142 if (VA.getLocInfo() == CCValAssign::Indirect) {
4143 // Store the argument in a stack slot and pass its address.
4144 Align StackAlign =
4145 std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
4146 getPrefTypeAlign(ArgValue.getValueType(), DAG));
4147 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
4148 // If the original argument was split and passed by reference, we need to
4149 // store the required parts of it here (and pass just one address).
4150 unsigned ArgIndex = Outs[i].OrigArgIndex;
4151 unsigned ArgPartOffset = Outs[i].PartOffset;
4152 assert(ArgPartOffset == 0);
4153 // Calculate the total size to store. We don't have access to what we're
4154 // actually storing other than performing the loop and collecting the
4155 // info.
4157 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
4158 SDValue PartValue = OutVals[i + 1];
4159 unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
4160 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
4161 EVT PartVT = PartValue.getValueType();
4162
4163 StoredSize += PartVT.getStoreSize();
4164 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
4165 Parts.push_back(std::make_pair(PartValue, Offset));
4166 ++i;
4167 }
4168 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
4169 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
4170 MemOpChains.push_back(
4171 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
4173 for (const auto &Part : Parts) {
4174 SDValue PartValue = Part.first;
4175 SDValue PartOffset = Part.second;
4177 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
4178 MemOpChains.push_back(
4179 DAG.getStore(Chain, DL, PartValue, Address,
4181 }
4182 ArgValue = SpillSlot;
4183 } else {
4184 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
4185 }
4186
4187 // Use local copy if it is a byval arg.
4188 if (Flags.isByVal())
4189 ArgValue = ByValArgs[j++];
4190
4191 if (VA.isRegLoc()) {
4192 // Queue up the argument copies and emit them at the end.
4193 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
4194 } else {
4195 assert(VA.isMemLoc() && "Argument not register or memory");
4196 assert(!IsTailCall && "Tail call not allowed if stack is used "
4197 "for passing parameters");
4198
4199 // Work out the address of the stack slot.
4200 if (!StackPtr.getNode())
4201 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
4203 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
4205
4206 // Emit the store.
4207 MemOpChains.push_back(
4208 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
4209 }
4210 }
4211
4212 // Join the stores, which are independent of one another.
4213 if (!MemOpChains.empty())
4214 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
4215
4216 SDValue Glue;
4217
4218 // Build a sequence of copy-to-reg nodes, chained and glued together.
4219 for (auto &Reg : RegsToPass) {
4220 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
4221 Glue = Chain.getValue(1);
4222 }
4223
4224 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
4225 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
4226 // split it and then direct call can be matched by PseudoCALL.
4227 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
4228 const GlobalValue *GV = S->getGlobal();
4229 unsigned OpFlags =
4233 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
4234 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
4235 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(
4236 *MF.getFunction().getParent(), nullptr)
4239 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
4240 }
4241
4242 // The first call operand is the chain and the second is the target address.
4244 Ops.push_back(Chain);