LLVM 19.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
18#include "LoongArchSubtarget.h"
22#include "llvm/ADT/Statistic.h"
27#include "llvm/IR/IRBuilder.h"
28#include "llvm/IR/IntrinsicsLoongArch.h"
30#include "llvm/Support/Debug.h"
34
35using namespace llvm;
36
37#define DEBUG_TYPE "loongarch-isel-lowering"
38
39STATISTIC(NumTailCalls, "Number of tail calls");
40
41static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
42 cl::desc("Trap on integer division by zero."),
43 cl::init(false));
44
46 const LoongArchSubtarget &STI)
47 : TargetLowering(TM), Subtarget(STI) {
48
49 MVT GRLenVT = Subtarget.getGRLenVT();
50
51 // Set up the register classes.
52
53 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
54 if (Subtarget.hasBasicF())
55 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
56 if (Subtarget.hasBasicD())
57 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
58
59 static const MVT::SimpleValueType LSXVTs[] = {
60 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
61 static const MVT::SimpleValueType LASXVTs[] = {
62 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
63
64 if (Subtarget.hasExtLSX())
65 for (MVT VT : LSXVTs)
66 addRegisterClass(VT, &LoongArch::LSX128RegClass);
67
68 if (Subtarget.hasExtLASX())
69 for (MVT VT : LASXVTs)
70 addRegisterClass(VT, &LoongArch::LASX256RegClass);
71
72 // Set operations for LA32 and LA64.
73
75 MVT::i1, Promote);
76
83
86 GRLenVT, Custom);
87
89
94
97
101
102 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
103 // we get to know which of sll and revb.2h is faster.
106
107 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
108 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
109 // and i32 could still be byte-swapped relatively cheaply.
111
117
120
121 // Set operations for LA64 only.
122
123 if (Subtarget.is64Bit()) {
141
145 }
146
147 // Set operations for LA32 only.
148
149 if (!Subtarget.is64Bit()) {
155 }
156
158
159 static const ISD::CondCode FPCCToExpand[] = {
162
163 // Set operations for 'F' feature.
164
165 if (Subtarget.hasBasicF()) {
166 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
167 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
168 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
169
185
186 if (Subtarget.is64Bit())
188
189 if (!Subtarget.hasBasicD()) {
191 if (Subtarget.is64Bit()) {
194 }
195 }
196 }
197
198 // Set operations for 'D' feature.
199
200 if (Subtarget.hasBasicD()) {
201 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
202 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
203 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
204 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
205 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
206
222
223 if (Subtarget.is64Bit())
225 }
226
227 // Set operations for 'LSX' feature.
228
229 if (Subtarget.hasExtLSX()) {
231 // Expand all truncating stores and extending loads.
232 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
233 setTruncStoreAction(VT, InnerVT, Expand);
236 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
237 }
238 // By default everything must be expanded. Then we will selectively turn
239 // on ones that can be effectively codegen'd.
240 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
242 }
243
244 for (MVT VT : LSXVTs) {
248
252
255 }
256 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
260 Legal);
262 VT, Legal);
269 Expand);
270 }
271 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
274 }
275 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
283 VT, Expand);
284 }
285 }
286
287 // Set operations for 'LASX' feature.
288
289 if (Subtarget.hasExtLASX()) {
290 for (MVT VT : LASXVTs) {
294
298
301 }
302 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
306 Legal);
308 VT, Legal);
315 Expand);
316 }
317 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
320 }
321 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
329 VT, Expand);
330 }
331 }
332
333 // Set DAG combine for LA32 and LA64.
334
339
340 // Set DAG combine for 'LSX' feature.
341
342 if (Subtarget.hasExtLSX())
344
345 // Compute derived properties from the register classes.
347
349
352
354
356
357 // Function alignments.
359 // Set preferred alignments.
363}
364
366 const GlobalAddressSDNode *GA) const {
367 // In order to maximise the opportunity for common subexpression elimination,
368 // keep a separate ADD node for the global address offset instead of folding
369 // it in the global address node. Later peephole optimisations may choose to
370 // fold it back in when profitable.
371 return false;
372}
373
375 SelectionDAG &DAG) const {
376 switch (Op.getOpcode()) {
378 return lowerATOMIC_FENCE(Op, DAG);
380 return lowerEH_DWARF_CFA(Op, DAG);
382 return lowerGlobalAddress(Op, DAG);
384 return lowerGlobalTLSAddress(Op, DAG);
386 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
388 return lowerINTRINSIC_W_CHAIN(Op, DAG);
390 return lowerINTRINSIC_VOID(Op, DAG);
392 return lowerBlockAddress(Op, DAG);
393 case ISD::JumpTable:
394 return lowerJumpTable(Op, DAG);
395 case ISD::SHL_PARTS:
396 return lowerShiftLeftParts(Op, DAG);
397 case ISD::SRA_PARTS:
398 return lowerShiftRightParts(Op, DAG, true);
399 case ISD::SRL_PARTS:
400 return lowerShiftRightParts(Op, DAG, false);
402 return lowerConstantPool(Op, DAG);
403 case ISD::FP_TO_SINT:
404 return lowerFP_TO_SINT(Op, DAG);
405 case ISD::BITCAST:
406 return lowerBITCAST(Op, DAG);
407 case ISD::UINT_TO_FP:
408 return lowerUINT_TO_FP(Op, DAG);
409 case ISD::SINT_TO_FP:
410 return lowerSINT_TO_FP(Op, DAG);
411 case ISD::VASTART:
412 return lowerVASTART(Op, DAG);
413 case ISD::FRAMEADDR:
414 return lowerFRAMEADDR(Op, DAG);
415 case ISD::RETURNADDR:
416 return lowerRETURNADDR(Op, DAG);
418 return lowerWRITE_REGISTER(Op, DAG);
420 return lowerINSERT_VECTOR_ELT(Op, DAG);
422 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
424 return lowerBUILD_VECTOR(Op, DAG);
426 return lowerVECTOR_SHUFFLE(Op, DAG);
427 }
428 return SDValue();
429}
430
431SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
432 SelectionDAG &DAG) const {
433 // TODO: custom shuffle.
434 return SDValue();
435}
436
437static bool isConstantOrUndef(const SDValue Op) {
438 if (Op->isUndef())
439 return true;
440 if (isa<ConstantSDNode>(Op))
441 return true;
442 if (isa<ConstantFPSDNode>(Op))
443 return true;
444 return false;
445}
446
448 for (unsigned i = 0; i < Op->getNumOperands(); ++i)
449 if (isConstantOrUndef(Op->getOperand(i)))
450 return true;
451 return false;
452}
453
454SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
455 SelectionDAG &DAG) const {
456 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
457 EVT ResTy = Op->getValueType(0);
458 SDLoc DL(Op);
459 APInt SplatValue, SplatUndef;
460 unsigned SplatBitSize;
461 bool HasAnyUndefs;
462 bool Is128Vec = ResTy.is128BitVector();
463 bool Is256Vec = ResTy.is256BitVector();
464
465 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
466 (!Subtarget.hasExtLASX() || !Is256Vec))
467 return SDValue();
468
469 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
470 /*MinSplatBits=*/8) &&
471 SplatBitSize <= 64) {
472 // We can only cope with 8, 16, 32, or 64-bit elements.
473 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
474 SplatBitSize != 64)
475 return SDValue();
476
477 EVT ViaVecTy;
478
479 switch (SplatBitSize) {
480 default:
481 return SDValue();
482 case 8:
483 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
484 break;
485 case 16:
486 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
487 break;
488 case 32:
489 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
490 break;
491 case 64:
492 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
493 break;
494 }
495
496 // SelectionDAG::getConstant will promote SplatValue appropriately.
497 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
498
499 // Bitcast to the type we originally wanted.
500 if (ViaVecTy != ResTy)
501 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
502
503 return Result;
504 }
505
506 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
507 return Op;
508
510 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
511 // The resulting code is the same length as the expansion, but it doesn't
512 // use memory operations.
513 EVT ResTy = Node->getValueType(0);
514
515 assert(ResTy.isVector());
516
517 unsigned NumElts = ResTy.getVectorNumElements();
518 SDValue Vector = DAG.getUNDEF(ResTy);
519 for (unsigned i = 0; i < NumElts; ++i) {
521 Node->getOperand(i),
522 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
523 }
524 return Vector;
525 }
526
527 return SDValue();
528}
529
531LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
532 SelectionDAG &DAG) const {
533 EVT VecTy = Op->getOperand(0)->getValueType(0);
534 SDValue Idx = Op->getOperand(1);
535 EVT EltTy = VecTy.getVectorElementType();
536 unsigned NumElts = VecTy.getVectorNumElements();
537
538 if (isa<ConstantSDNode>(Idx) &&
539 (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 ||
540 EltTy == MVT::f64 || Idx->getAsZExtVal() < NumElts / 2))
541 return Op;
542
543 return SDValue();
544}
545
547LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
548 SelectionDAG &DAG) const {
549 if (isa<ConstantSDNode>(Op->getOperand(2)))
550 return Op;
551 return SDValue();
552}
553
554SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
555 SelectionDAG &DAG) const {
556 SDLoc DL(Op);
557 SyncScope::ID FenceSSID =
558 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
559
560 // singlethread fences only synchronize with signal handlers on the same
561 // thread and thus only need to preserve instruction order, not actually
562 // enforce memory ordering.
563 if (FenceSSID == SyncScope::SingleThread)
564 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
565 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
566
567 return Op;
568}
569
570SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
571 SelectionDAG &DAG) const {
572
573 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
574 DAG.getContext()->emitError(
575 "On LA64, only 64-bit registers can be written.");
576 return Op.getOperand(0);
577 }
578
579 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
580 DAG.getContext()->emitError(
581 "On LA32, only 32-bit registers can be written.");
582 return Op.getOperand(0);
583 }
584
585 return Op;
586}
587
588SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
589 SelectionDAG &DAG) const {
590 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
591 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
592 "be a constant integer");
593 return SDValue();
594 }
595
598 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
599 EVT VT = Op.getValueType();
600 SDLoc DL(Op);
601 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
602 unsigned Depth = Op.getConstantOperandVal(0);
603 int GRLenInBytes = Subtarget.getGRLen() / 8;
604
605 while (Depth--) {
606 int Offset = -(GRLenInBytes * 2);
607 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
609 FrameAddr =
610 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
611 }
612 return FrameAddr;
613}
614
615SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
616 SelectionDAG &DAG) const {
618 return SDValue();
619
620 // Currently only support lowering return address for current frame.
621 if (Op.getConstantOperandVal(0) != 0) {
622 DAG.getContext()->emitError(
623 "return address can only be determined for the current frame");
624 return SDValue();
625 }
626
629 MVT GRLenVT = Subtarget.getGRLenVT();
630
631 // Return the value of the return address register, marking it an implicit
632 // live-in.
633 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
634 getRegClassFor(GRLenVT));
635 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
636}
637
638SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
639 SelectionDAG &DAG) const {
641 auto Size = Subtarget.getGRLen() / 8;
642 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
643 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
644}
645
646SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
647 SelectionDAG &DAG) const {
649 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
650
651 SDLoc DL(Op);
652 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
654
655 // vastart just stores the address of the VarArgsFrameIndex slot into the
656 // memory location argument.
657 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
658 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
660}
661
662SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
663 SelectionDAG &DAG) const {
664 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
665 !Subtarget.hasBasicD() && "unexpected target features");
666
667 SDLoc DL(Op);
668 SDValue Op0 = Op.getOperand(0);
669 if (Op0->getOpcode() == ISD::AND) {
670 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
671 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
672 return Op;
673 }
674
675 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
676 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
677 Op0.getConstantOperandVal(2) == UINT64_C(0))
678 return Op;
679
680 if (Op0.getOpcode() == ISD::AssertZext &&
681 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
682 return Op;
683
684 EVT OpVT = Op0.getValueType();
685 EVT RetVT = Op.getValueType();
686 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
687 MakeLibCallOptions CallOptions;
688 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
689 SDValue Chain = SDValue();
691 std::tie(Result, Chain) =
692 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
693 return Result;
694}
695
696SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
697 SelectionDAG &DAG) const {
698 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
699 !Subtarget.hasBasicD() && "unexpected target features");
700
701 SDLoc DL(Op);
702 SDValue Op0 = Op.getOperand(0);
703
704 if ((Op0.getOpcode() == ISD::AssertSext ||
706 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
707 return Op;
708
709 EVT OpVT = Op0.getValueType();
710 EVT RetVT = Op.getValueType();
711 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
712 MakeLibCallOptions CallOptions;
713 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
714 SDValue Chain = SDValue();
716 std::tie(Result, Chain) =
717 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
718 return Result;
719}
720
721SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
722 SelectionDAG &DAG) const {
723
724 SDLoc DL(Op);
725 SDValue Op0 = Op.getOperand(0);
726
727 if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 &&
728 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
729 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
730 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
731 }
732 return Op;
733}
734
735SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
736 SelectionDAG &DAG) const {
737
738 SDLoc DL(Op);
739
740 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
741 !Subtarget.hasBasicD()) {
742 SDValue Dst =
743 DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op.getOperand(0));
744 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
745 }
746
747 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
748 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op.getOperand(0));
749 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
750}
751
753 SelectionDAG &DAG, unsigned Flags) {
754 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
755}
756
758 SelectionDAG &DAG, unsigned Flags) {
759 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
760 Flags);
761}
762
764 SelectionDAG &DAG, unsigned Flags) {
765 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
766 N->getOffset(), Flags);
767}
768
770 SelectionDAG &DAG, unsigned Flags) {
771 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
772}
773
774template <class NodeTy>
775SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
777 bool IsLocal) const {
778 SDLoc DL(N);
779 EVT Ty = getPointerTy(DAG.getDataLayout());
780 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
782
783 switch (M) {
784 default:
785 report_fatal_error("Unsupported code model");
786
787 case CodeModel::Large: {
788 assert(Subtarget.is64Bit() && "Large code model requires LA64");
789
790 // This is not actually used, but is necessary for successfully matching
791 // the PseudoLA_*_LARGE nodes.
792 SDValue Tmp = DAG.getConstant(0, DL, Ty);
793 if (IsLocal) {
794 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
795 // eventually becomes the desired 5-insn code sequence.
796 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
797 Tmp, Addr),
798 0);
799 } else {
800 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
801 // eventually becomes the desired 5-insn code sequence.
802 Load = SDValue(
803 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
804 0);
805 }
806 break;
807 }
808
809 case CodeModel::Small:
811 if (IsLocal) {
812 // This generates the pattern (PseudoLA_PCREL sym), which expands to
813 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
814 Load = SDValue(
815 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
816 } else {
817 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
818 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
819 Load =
820 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
821 }
822 }
823
824 if (!IsLocal) {
825 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
831 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
832 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
833 }
834
835 return Load;
836}
837
838SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
839 SelectionDAG &DAG) const {
840 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
841 DAG.getTarget().getCodeModel());
842}
843
844SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
845 SelectionDAG &DAG) const {
846 return getAddr(cast<JumpTableSDNode>(Op), DAG,
847 DAG.getTarget().getCodeModel());
848}
849
850SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
851 SelectionDAG &DAG) const {
852 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
853 DAG.getTarget().getCodeModel());
854}
855
856SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
857 SelectionDAG &DAG) const {
858 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
859 assert(N->getOffset() == 0 && "unexpected offset in global node");
860 auto CM = DAG.getTarget().getCodeModel();
861 const GlobalValue *GV = N->getGlobal();
862
863 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
864 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
865 CM = *GCM;
866 }
867
868 return getAddr(N, DAG, CM, GV->isDSOLocal());
869}
870
871SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
872 SelectionDAG &DAG,
873 unsigned Opc, bool UseGOT,
874 bool Large) const {
875 SDLoc DL(N);
876 EVT Ty = getPointerTy(DAG.getDataLayout());
877 MVT GRLenVT = Subtarget.getGRLenVT();
878
879 // This is not actually used, but is necessary for successfully matching the
880 // PseudoLA_*_LARGE nodes.
881 SDValue Tmp = DAG.getConstant(0, DL, Ty);
882 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
884 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
885 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
886 if (UseGOT) {
887 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
893 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
894 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
895 }
896
897 // Add the thread pointer.
898 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
899 DAG.getRegister(LoongArch::R2, GRLenVT));
900}
901
902SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
903 SelectionDAG &DAG,
904 unsigned Opc,
905 bool Large) const {
906 SDLoc DL(N);
907 EVT Ty = getPointerTy(DAG.getDataLayout());
908 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
909
910 // This is not actually used, but is necessary for successfully matching the
911 // PseudoLA_*_LARGE nodes.
912 SDValue Tmp = DAG.getConstant(0, DL, Ty);
913
914 // Use a PC-relative addressing mode to access the dynamic GOT address.
915 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
916 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
917 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
918
919 // Prepare argument list to generate call.
921 ArgListEntry Entry;
922 Entry.Node = Load;
923 Entry.Ty = CallTy;
924 Args.push_back(Entry);
925
926 // Setup call to __tls_get_addr.
928 CLI.setDebugLoc(DL)
929 .setChain(DAG.getEntryNode())
930 .setLibCallee(CallingConv::C, CallTy,
931 DAG.getExternalSymbol("__tls_get_addr", Ty),
932 std::move(Args));
933
934 return LowerCallTo(CLI).first;
935}
936
937SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
938 SelectionDAG &DAG, unsigned Opc,
939 bool Large) const {
940 SDLoc DL(N);
941 EVT Ty = getPointerTy(DAG.getDataLayout());
942 const GlobalValue *GV = N->getGlobal();
943
944 // This is not actually used, but is necessary for successfully matching the
945 // PseudoLA_*_LARGE nodes.
946 SDValue Tmp = DAG.getConstant(0, DL, Ty);
947
948 // Use a PC-relative addressing mode to access the global dynamic GOT address.
949 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
950 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
951 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
952 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
953}
954
956LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
957 SelectionDAG &DAG) const {
960 report_fatal_error("In GHC calling convention TLS is not supported");
961
963 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
964
965 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
966 assert(N->getOffset() == 0 && "unexpected offset in global node");
967
968 if (DAG.getTarget().useEmulatedTLS())
969 report_fatal_error("the emulated TLS is prohibited",
970 /*GenCrashDiag=*/false);
971
972 bool IsDesc = DAG.getTarget().useTLSDESC();
973
974 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
976 // In this model, application code calls the dynamic linker function
977 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
978 // runtime.
979 if (!IsDesc)
980 return getDynamicTLSAddr(N, DAG,
981 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
982 : LoongArch::PseudoLA_TLS_GD,
983 Large);
984 break;
986 // Same as GeneralDynamic, except for assembly modifiers and relocation
987 // records.
988 if (!IsDesc)
989 return getDynamicTLSAddr(N, DAG,
990 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
991 : LoongArch::PseudoLA_TLS_LD,
992 Large);
993 break;
995 // This model uses the GOT to resolve TLS offsets.
996 return getStaticTLSAddr(N, DAG,
997 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
998 : LoongArch::PseudoLA_TLS_IE,
999 /*UseGOT=*/true, Large);
1001 // This model is used when static linking as the TLS offsets are resolved
1002 // during program linking.
1003 //
1004 // This node doesn't need an extra argument for the large code model.
1005 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
1006 /*UseGOT=*/false);
1007 }
1008
1009 return getTLSDescAddr(N, DAG,
1010 Large ? LoongArch::PseudoLA_TLS_DESC_PC_LARGE
1011 : LoongArch::PseudoLA_TLS_DESC_PC,
1012 Large);
1013}
1014
1015template <unsigned N>
1017 SelectionDAG &DAG, bool IsSigned = false) {
1018 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
1019 // Check the ImmArg.
1020 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
1021 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
1022 DAG.getContext()->emitError(Op->getOperationName(0) +
1023 ": argument out of range.");
1024 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
1025 }
1026 return SDValue();
1027}
1028
1029SDValue
1030LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
1031 SelectionDAG &DAG) const {
1032 SDLoc DL(Op);
1033 switch (Op.getConstantOperandVal(0)) {
1034 default:
1035 return SDValue(); // Don't custom lower most intrinsics.
1036 case Intrinsic::thread_pointer: {
1037 EVT PtrVT = getPointerTy(DAG.getDataLayout());
1038 return DAG.getRegister(LoongArch::R2, PtrVT);
1039 }
1040 case Intrinsic::loongarch_lsx_vpickve2gr_d:
1041 case Intrinsic::loongarch_lsx_vpickve2gr_du:
1042 case Intrinsic::loongarch_lsx_vreplvei_d:
1043 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
1044 return checkIntrinsicImmArg<1>(Op, 2, DAG);
1045 case Intrinsic::loongarch_lsx_vreplvei_w:
1046 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
1047 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
1048 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
1049 case Intrinsic::loongarch_lasx_xvpickve_d:
1050 case Intrinsic::loongarch_lasx_xvpickve_d_f:
1051 return checkIntrinsicImmArg<2>(Op, 2, DAG);
1052 case Intrinsic::loongarch_lasx_xvinsve0_d:
1053 return checkIntrinsicImmArg<2>(Op, 3, DAG);
1054 case Intrinsic::loongarch_lsx_vsat_b:
1055 case Intrinsic::loongarch_lsx_vsat_bu:
1056 case Intrinsic::loongarch_lsx_vrotri_b:
1057 case Intrinsic::loongarch_lsx_vsllwil_h_b:
1058 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
1059 case Intrinsic::loongarch_lsx_vsrlri_b:
1060 case Intrinsic::loongarch_lsx_vsrari_b:
1061 case Intrinsic::loongarch_lsx_vreplvei_h:
1062 case Intrinsic::loongarch_lasx_xvsat_b:
1063 case Intrinsic::loongarch_lasx_xvsat_bu:
1064 case Intrinsic::loongarch_lasx_xvrotri_b:
1065 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
1066 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
1067 case Intrinsic::loongarch_lasx_xvsrlri_b:
1068 case Intrinsic::loongarch_lasx_xvsrari_b:
1069 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
1070 case Intrinsic::loongarch_lasx_xvpickve_w:
1071 case Intrinsic::loongarch_lasx_xvpickve_w_f:
1072 return checkIntrinsicImmArg<3>(Op, 2, DAG);
1073 case Intrinsic::loongarch_lasx_xvinsve0_w:
1074 return checkIntrinsicImmArg<3>(Op, 3, DAG);
1075 case Intrinsic::loongarch_lsx_vsat_h:
1076 case Intrinsic::loongarch_lsx_vsat_hu:
1077 case Intrinsic::loongarch_lsx_vrotri_h:
1078 case Intrinsic::loongarch_lsx_vsllwil_w_h:
1079 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
1080 case Intrinsic::loongarch_lsx_vsrlri_h:
1081 case Intrinsic::loongarch_lsx_vsrari_h:
1082 case Intrinsic::loongarch_lsx_vreplvei_b:
1083 case Intrinsic::loongarch_lasx_xvsat_h:
1084 case Intrinsic::loongarch_lasx_xvsat_hu:
1085 case Intrinsic::loongarch_lasx_xvrotri_h:
1086 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
1087 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
1088 case Intrinsic::loongarch_lasx_xvsrlri_h:
1089 case Intrinsic::loongarch_lasx_xvsrari_h:
1090 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
1091 return checkIntrinsicImmArg<4>(Op, 2, DAG);
1092 case Intrinsic::loongarch_lsx_vsrlni_b_h:
1093 case Intrinsic::loongarch_lsx_vsrani_b_h:
1094 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
1095 case Intrinsic::loongarch_lsx_vsrarni_b_h:
1096 case Intrinsic::loongarch_lsx_vssrlni_b_h:
1097 case Intrinsic::loongarch_lsx_vssrani_b_h:
1098 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
1099 case Intrinsic::loongarch_lsx_vssrani_bu_h:
1100 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
1101 case Intrinsic::loongarch_lsx_vssrarni_b_h:
1102 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
1103 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
1104 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
1105 case Intrinsic::loongarch_lasx_xvsrani_b_h:
1106 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
1107 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
1108 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
1109 case Intrinsic::loongarch_lasx_xvssrani_b_h:
1110 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
1111 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
1112 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
1113 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
1114 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
1115 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
1116 return checkIntrinsicImmArg<4>(Op, 3, DAG);
1117 case Intrinsic::loongarch_lsx_vsat_w:
1118 case Intrinsic::loongarch_lsx_vsat_wu:
1119 case Intrinsic::loongarch_lsx_vrotri_w:
1120 case Intrinsic::loongarch_lsx_vsllwil_d_w:
1121 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
1122 case Intrinsic::loongarch_lsx_vsrlri_w:
1123 case Intrinsic::loongarch_lsx_vsrari_w:
1124 case Intrinsic::loongarch_lsx_vslei_bu:
1125 case Intrinsic::loongarch_lsx_vslei_hu:
1126 case Intrinsic::loongarch_lsx_vslei_wu:
1127 case Intrinsic::loongarch_lsx_vslei_du:
1128 case Intrinsic::loongarch_lsx_vslti_bu:
1129 case Intrinsic::loongarch_lsx_vslti_hu:
1130 case Intrinsic::loongarch_lsx_vslti_wu:
1131 case Intrinsic::loongarch_lsx_vslti_du:
1132 case Intrinsic::loongarch_lsx_vbsll_v:
1133 case Intrinsic::loongarch_lsx_vbsrl_v:
1134 case Intrinsic::loongarch_lasx_xvsat_w:
1135 case Intrinsic::loongarch_lasx_xvsat_wu:
1136 case Intrinsic::loongarch_lasx_xvrotri_w:
1137 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
1138 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
1139 case Intrinsic::loongarch_lasx_xvsrlri_w:
1140 case Intrinsic::loongarch_lasx_xvsrari_w:
1141 case Intrinsic::loongarch_lasx_xvslei_bu:
1142 case Intrinsic::loongarch_lasx_xvslei_hu:
1143 case Intrinsic::loongarch_lasx_xvslei_wu:
1144 case Intrinsic::loongarch_lasx_xvslei_du:
1145 case Intrinsic::loongarch_lasx_xvslti_bu:
1146 case Intrinsic::loongarch_lasx_xvslti_hu:
1147 case Intrinsic::loongarch_lasx_xvslti_wu:
1148 case Intrinsic::loongarch_lasx_xvslti_du:
1149 case Intrinsic::loongarch_lasx_xvbsll_v:
1150 case Intrinsic::loongarch_lasx_xvbsrl_v:
1151 return checkIntrinsicImmArg<5>(Op, 2, DAG);
1152 case Intrinsic::loongarch_lsx_vseqi_b:
1153 case Intrinsic::loongarch_lsx_vseqi_h:
1154 case Intrinsic::loongarch_lsx_vseqi_w:
1155 case Intrinsic::loongarch_lsx_vseqi_d:
1156 case Intrinsic::loongarch_lsx_vslei_b:
1157 case Intrinsic::loongarch_lsx_vslei_h:
1158 case Intrinsic::loongarch_lsx_vslei_w:
1159 case Intrinsic::loongarch_lsx_vslei_d:
1160 case Intrinsic::loongarch_lsx_vslti_b:
1161 case Intrinsic::loongarch_lsx_vslti_h:
1162 case Intrinsic::loongarch_lsx_vslti_w:
1163 case Intrinsic::loongarch_lsx_vslti_d:
1164 case Intrinsic::loongarch_lasx_xvseqi_b:
1165 case Intrinsic::loongarch_lasx_xvseqi_h:
1166 case Intrinsic::loongarch_lasx_xvseqi_w:
1167 case Intrinsic::loongarch_lasx_xvseqi_d:
1168 case Intrinsic::loongarch_lasx_xvslei_b:
1169 case Intrinsic::loongarch_lasx_xvslei_h:
1170 case Intrinsic::loongarch_lasx_xvslei_w:
1171 case Intrinsic::loongarch_lasx_xvslei_d:
1172 case Intrinsic::loongarch_lasx_xvslti_b:
1173 case Intrinsic::loongarch_lasx_xvslti_h:
1174 case Intrinsic::loongarch_lasx_xvslti_w:
1175 case Intrinsic::loongarch_lasx_xvslti_d:
1176 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
1177 case Intrinsic::loongarch_lsx_vsrlni_h_w:
1178 case Intrinsic::loongarch_lsx_vsrani_h_w:
1179 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
1180 case Intrinsic::loongarch_lsx_vsrarni_h_w:
1181 case Intrinsic::loongarch_lsx_vssrlni_h_w:
1182 case Intrinsic::loongarch_lsx_vssrani_h_w:
1183 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
1184 case Intrinsic::loongarch_lsx_vssrani_hu_w:
1185 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
1186 case Intrinsic::loongarch_lsx_vssrarni_h_w:
1187 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
1188 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
1189 case Intrinsic::loongarch_lsx_vfrstpi_b:
1190 case Intrinsic::loongarch_lsx_vfrstpi_h:
1191 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
1192 case Intrinsic::loongarch_lasx_xvsrani_h_w:
1193 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
1194 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
1195 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
1196 case Intrinsic::loongarch_lasx_xvssrani_h_w:
1197 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
1198 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
1199 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
1200 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
1201 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
1202 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
1203 case Intrinsic::loongarch_lasx_xvfrstpi_b:
1204 case Intrinsic::loongarch_lasx_xvfrstpi_h:
1205 return checkIntrinsicImmArg<5>(Op, 3, DAG);
1206 case Intrinsic::loongarch_lsx_vsat_d:
1207 case Intrinsic::loongarch_lsx_vsat_du:
1208 case Intrinsic::loongarch_lsx_vrotri_d:
1209 case Intrinsic::loongarch_lsx_vsrlri_d:
1210 case Intrinsic::loongarch_lsx_vsrari_d:
1211 case Intrinsic::loongarch_lasx_xvsat_d:
1212 case Intrinsic::loongarch_lasx_xvsat_du:
1213 case Intrinsic::loongarch_lasx_xvrotri_d:
1214 case Intrinsic::loongarch_lasx_xvsrlri_d:
1215 case Intrinsic::loongarch_lasx_xvsrari_d:
1216 return checkIntrinsicImmArg<6>(Op, 2, DAG);
1217 case Intrinsic::loongarch_lsx_vsrlni_w_d:
1218 case Intrinsic::loongarch_lsx_vsrani_w_d:
1219 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
1220 case Intrinsic::loongarch_lsx_vsrarni_w_d:
1221 case Intrinsic::loongarch_lsx_vssrlni_w_d:
1222 case Intrinsic::loongarch_lsx_vssrani_w_d:
1223 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
1224 case Intrinsic::loongarch_lsx_vssrani_wu_d:
1225 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
1226 case Intrinsic::loongarch_lsx_vssrarni_w_d:
1227 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
1228 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
1229 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
1230 case Intrinsic::loongarch_lasx_xvsrani_w_d:
1231 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
1232 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
1233 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
1234 case Intrinsic::loongarch_lasx_xvssrani_w_d:
1235 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
1236 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
1237 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
1238 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
1239 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
1240 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
1241 return checkIntrinsicImmArg<6>(Op, 3, DAG);
1242 case Intrinsic::loongarch_lsx_vsrlni_d_q:
1243 case Intrinsic::loongarch_lsx_vsrani_d_q:
1244 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
1245 case Intrinsic::loongarch_lsx_vsrarni_d_q:
1246 case Intrinsic::loongarch_lsx_vssrlni_d_q:
1247 case Intrinsic::loongarch_lsx_vssrani_d_q:
1248 case Intrinsic::loongarch_lsx_vssrlni_du_q:
1249 case Intrinsic::loongarch_lsx_vssrani_du_q:
1250 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
1251 case Intrinsic::loongarch_lsx_vssrarni_d_q:
1252 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
1253 case Intrinsic::loongarch_lsx_vssrarni_du_q:
1254 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
1255 case Intrinsic::loongarch_lasx_xvsrani_d_q:
1256 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
1257 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
1258 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
1259 case Intrinsic::loongarch_lasx_xvssrani_d_q:
1260 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
1261 case Intrinsic::loongarch_lasx_xvssrani_du_q:
1262 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
1263 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
1264 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
1265 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
1266 return checkIntrinsicImmArg<7>(Op, 3, DAG);
1267 case Intrinsic::loongarch_lsx_vnori_b:
1268 case Intrinsic::loongarch_lsx_vshuf4i_b:
1269 case Intrinsic::loongarch_lsx_vshuf4i_h:
1270 case Intrinsic::loongarch_lsx_vshuf4i_w:
1271 case Intrinsic::loongarch_lasx_xvnori_b:
1272 case Intrinsic::loongarch_lasx_xvshuf4i_b:
1273 case Intrinsic::loongarch_lasx_xvshuf4i_h:
1274 case Intrinsic::loongarch_lasx_xvshuf4i_w:
1275 case Intrinsic::loongarch_lasx_xvpermi_d:
1276 return checkIntrinsicImmArg<8>(Op, 2, DAG);
1277 case Intrinsic::loongarch_lsx_vshuf4i_d:
1278 case Intrinsic::loongarch_lsx_vpermi_w:
1279 case Intrinsic::loongarch_lsx_vbitseli_b:
1280 case Intrinsic::loongarch_lsx_vextrins_b:
1281 case Intrinsic::loongarch_lsx_vextrins_h:
1282 case Intrinsic::loongarch_lsx_vextrins_w:
1283 case Intrinsic::loongarch_lsx_vextrins_d:
1284 case Intrinsic::loongarch_lasx_xvshuf4i_d:
1285 case Intrinsic::loongarch_lasx_xvpermi_w:
1286 case Intrinsic::loongarch_lasx_xvpermi_q:
1287 case Intrinsic::loongarch_lasx_xvbitseli_b:
1288 case Intrinsic::loongarch_lasx_xvextrins_b:
1289 case Intrinsic::loongarch_lasx_xvextrins_h:
1290 case Intrinsic::loongarch_lasx_xvextrins_w:
1291 case Intrinsic::loongarch_lasx_xvextrins_d:
1292 return checkIntrinsicImmArg<8>(Op, 3, DAG);
1293 case Intrinsic::loongarch_lsx_vrepli_b:
1294 case Intrinsic::loongarch_lsx_vrepli_h:
1295 case Intrinsic::loongarch_lsx_vrepli_w:
1296 case Intrinsic::loongarch_lsx_vrepli_d:
1297 case Intrinsic::loongarch_lasx_xvrepli_b:
1298 case Intrinsic::loongarch_lasx_xvrepli_h:
1299 case Intrinsic::loongarch_lasx_xvrepli_w:
1300 case Intrinsic::loongarch_lasx_xvrepli_d:
1301 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
1302 case Intrinsic::loongarch_lsx_vldi:
1303 case Intrinsic::loongarch_lasx_xvldi:
1304 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
1305 }
1306}
1307
1308// Helper function that emits error message for intrinsics with chain and return
1309// merge values of a UNDEF and the chain.
1311 StringRef ErrorMsg,
1312 SelectionDAG &DAG) {
1313 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
1314 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
1315 SDLoc(Op));
1316}
1317
1318SDValue
1319LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
1320 SelectionDAG &DAG) const {
1321 SDLoc DL(Op);
1322 MVT GRLenVT = Subtarget.getGRLenVT();
1323 EVT VT = Op.getValueType();
1324 SDValue Chain = Op.getOperand(0);
1325 const StringRef ErrorMsgOOR = "argument out of range";
1326 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
1327 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
1328
1329 switch (Op.getConstantOperandVal(1)) {
1330 default:
1331 return Op;
1332 case Intrinsic::loongarch_crc_w_b_w:
1333 case Intrinsic::loongarch_crc_w_h_w:
1334 case Intrinsic::loongarch_crc_w_w_w:
1335 case Intrinsic::loongarch_crc_w_d_w:
1336 case Intrinsic::loongarch_crcc_w_b_w:
1337 case Intrinsic::loongarch_crcc_w_h_w:
1338 case Intrinsic::loongarch_crcc_w_w_w:
1339 case Intrinsic::loongarch_crcc_w_d_w:
1340 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
1341 case Intrinsic::loongarch_csrrd_w:
1342 case Intrinsic::loongarch_csrrd_d: {
1343 unsigned Imm = Op.getConstantOperandVal(2);
1344 return !isUInt<14>(Imm)
1345 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1346 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
1347 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
1348 }
1349 case Intrinsic::loongarch_csrwr_w:
1350 case Intrinsic::loongarch_csrwr_d: {
1351 unsigned Imm = Op.getConstantOperandVal(3);
1352 return !isUInt<14>(Imm)
1353 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1354 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
1355 {Chain, Op.getOperand(2),
1356 DAG.getConstant(Imm, DL, GRLenVT)});
1357 }
1358 case Intrinsic::loongarch_csrxchg_w:
1359 case Intrinsic::loongarch_csrxchg_d: {
1360 unsigned Imm = Op.getConstantOperandVal(4);
1361 return !isUInt<14>(Imm)
1362 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1363 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
1364 {Chain, Op.getOperand(2), Op.getOperand(3),
1365 DAG.getConstant(Imm, DL, GRLenVT)});
1366 }
1367 case Intrinsic::loongarch_iocsrrd_d: {
1368 return DAG.getNode(
1369 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
1370 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
1371 }
1372#define IOCSRRD_CASE(NAME, NODE) \
1373 case Intrinsic::loongarch_##NAME: { \
1374 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
1375 {Chain, Op.getOperand(2)}); \
1376 }
1377 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
1378 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
1379 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
1380#undef IOCSRRD_CASE
1381 case Intrinsic::loongarch_cpucfg: {
1382 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
1383 {Chain, Op.getOperand(2)});
1384 }
1385 case Intrinsic::loongarch_lddir_d: {
1386 unsigned Imm = Op.getConstantOperandVal(3);
1387 return !isUInt<8>(Imm)
1388 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1389 : Op;
1390 }
1391 case Intrinsic::loongarch_movfcsr2gr: {
1392 if (!Subtarget.hasBasicF())
1393 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
1394 unsigned Imm = Op.getConstantOperandVal(2);
1395 return !isUInt<2>(Imm)
1396 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1397 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
1398 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
1399 }
1400 case Intrinsic::loongarch_lsx_vld:
1401 case Intrinsic::loongarch_lsx_vldrepl_b:
1402 case Intrinsic::loongarch_lasx_xvld:
1403 case Intrinsic::loongarch_lasx_xvldrepl_b:
1404 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
1405 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1406 : SDValue();
1407 case Intrinsic::loongarch_lsx_vldrepl_h:
1408 case Intrinsic::loongarch_lasx_xvldrepl_h:
1409 return !isShiftedInt<11, 1>(
1410 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
1412 Op, "argument out of range or not a multiple of 2", DAG)
1413 : SDValue();
1414 case Intrinsic::loongarch_lsx_vldrepl_w:
1415 case Intrinsic::loongarch_lasx_xvldrepl_w:
1416 return !isShiftedInt<10, 2>(
1417 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
1419 Op, "argument out of range or not a multiple of 4", DAG)
1420 : SDValue();
1421 case Intrinsic::loongarch_lsx_vldrepl_d:
1422 case Intrinsic::loongarch_lasx_xvldrepl_d:
1423 return !isShiftedInt<9, 3>(
1424 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
1426 Op, "argument out of range or not a multiple of 8", DAG)
1427 : SDValue();
1428 }
1429}
1430
1431// Helper function that emits error message for intrinsics with void return
1432// value and return the chain.
1434 SelectionDAG &DAG) {
1435
1436 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
1437 return Op.getOperand(0);
1438}
1439
1440SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
1441 SelectionDAG &DAG) const {
1442 SDLoc DL(Op);
1443 MVT GRLenVT = Subtarget.getGRLenVT();
1444 SDValue Chain = Op.getOperand(0);
1445 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
1446 SDValue Op2 = Op.getOperand(2);
1447 const StringRef ErrorMsgOOR = "argument out of range";
1448 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
1449 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
1450 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
1451
1452 switch (IntrinsicEnum) {
1453 default:
1454 // TODO: Add more Intrinsics.
1455 return SDValue();
1456 case Intrinsic::loongarch_cacop_d:
1457 case Intrinsic::loongarch_cacop_w: {
1458 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
1459 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
1460 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
1461 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
1462 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
1463 unsigned Imm1 = Op2->getAsZExtVal();
1464 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
1465 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
1466 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
1467 return Op;
1468 }
1469 case Intrinsic::loongarch_dbar: {
1470 unsigned Imm = Op2->getAsZExtVal();
1471 return !isUInt<15>(Imm)
1472 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1473 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
1474 DAG.getConstant(Imm, DL, GRLenVT));
1475 }
1476 case Intrinsic::loongarch_ibar: {
1477 unsigned Imm = Op2->getAsZExtVal();
1478 return !isUInt<15>(Imm)
1479 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1480 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
1481 DAG.getConstant(Imm, DL, GRLenVT));
1482 }
1483 case Intrinsic::loongarch_break: {
1484 unsigned Imm = Op2->getAsZExtVal();
1485 return !isUInt<15>(Imm)
1486 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1487 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
1488 DAG.getConstant(Imm, DL, GRLenVT));
1489 }
1490 case Intrinsic::loongarch_movgr2fcsr: {
1491 if (!Subtarget.hasBasicF())
1492 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
1493 unsigned Imm = Op2->getAsZExtVal();
1494 return !isUInt<2>(Imm)
1495 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1496 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
1497 DAG.getConstant(Imm, DL, GRLenVT),
1498 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
1499 Op.getOperand(3)));
1500 }
1501 case Intrinsic::loongarch_syscall: {
1502 unsigned Imm = Op2->getAsZExtVal();
1503 return !isUInt<15>(Imm)
1504 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1505 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
1506 DAG.getConstant(Imm, DL, GRLenVT));
1507 }
1508#define IOCSRWR_CASE(NAME, NODE) \
1509 case Intrinsic::loongarch_##NAME: { \
1510 SDValue Op3 = Op.getOperand(3); \
1511 return Subtarget.is64Bit() \
1512 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
1513 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
1514 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
1515 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
1516 Op3); \
1517 }
1518 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
1519 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
1520 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
1521#undef IOCSRWR_CASE
1522 case Intrinsic::loongarch_iocsrwr_d: {
1523 return !Subtarget.is64Bit()
1524 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
1525 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
1526 Op2,
1527 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
1528 Op.getOperand(3)));
1529 }
1530#define ASRT_LE_GT_CASE(NAME) \
1531 case Intrinsic::loongarch_##NAME: { \
1532 return !Subtarget.is64Bit() \
1533 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
1534 : Op; \
1535 }
1536 ASRT_LE_GT_CASE(asrtle_d)
1537 ASRT_LE_GT_CASE(asrtgt_d)
1538#undef ASRT_LE_GT_CASE
1539 case Intrinsic::loongarch_ldpte_d: {
1540 unsigned Imm = Op.getConstantOperandVal(3);
1541 return !Subtarget.is64Bit()
1542 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
1543 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1544 : Op;
1545 }
1546 case Intrinsic::loongarch_lsx_vst:
1547 case Intrinsic::loongarch_lasx_xvst:
1548 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
1549 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1550 : SDValue();
1551 case Intrinsic::loongarch_lasx_xvstelm_b:
1552 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1553 !isUInt<5>(Op.getConstantOperandVal(5)))
1554 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1555 : SDValue();
1556 case Intrinsic::loongarch_lsx_vstelm_b:
1557 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1558 !isUInt<4>(Op.getConstantOperandVal(5)))
1559 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1560 : SDValue();
1561 case Intrinsic::loongarch_lasx_xvstelm_h:
1562 return (!isShiftedInt<8, 1>(
1563 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1564 !isUInt<4>(Op.getConstantOperandVal(5)))
1566 Op, "argument out of range or not a multiple of 2", DAG)
1567 : SDValue();
1568 case Intrinsic::loongarch_lsx_vstelm_h:
1569 return (!isShiftedInt<8, 1>(
1570 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1571 !isUInt<3>(Op.getConstantOperandVal(5)))
1573 Op, "argument out of range or not a multiple of 2", DAG)
1574 : SDValue();
1575 case Intrinsic::loongarch_lasx_xvstelm_w:
1576 return (!isShiftedInt<8, 2>(
1577 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1578 !isUInt<3>(Op.getConstantOperandVal(5)))
1580 Op, "argument out of range or not a multiple of 4", DAG)
1581 : SDValue();
1582 case Intrinsic::loongarch_lsx_vstelm_w:
1583 return (!isShiftedInt<8, 2>(
1584 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1585 !isUInt<2>(Op.getConstantOperandVal(5)))
1587 Op, "argument out of range or not a multiple of 4", DAG)
1588 : SDValue();
1589 case Intrinsic::loongarch_lasx_xvstelm_d:
1590 return (!isShiftedInt<8, 3>(
1591 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1592 !isUInt<2>(Op.getConstantOperandVal(5)))
1594 Op, "argument out of range or not a multiple of 8", DAG)
1595 : SDValue();
1596 case Intrinsic::loongarch_lsx_vstelm_d:
1597 return (!isShiftedInt<8, 3>(
1598 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1599 !isUInt<1>(Op.getConstantOperandVal(5)))
1601 Op, "argument out of range or not a multiple of 8", DAG)
1602 : SDValue();
1603 }
1604}
1605
1606SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
1607 SelectionDAG &DAG) const {
1608 SDLoc DL(Op);
1609 SDValue Lo = Op.getOperand(0);
1610 SDValue Hi = Op.getOperand(1);
1611 SDValue Shamt = Op.getOperand(2);
1612 EVT VT = Lo.getValueType();
1613
1614 // if Shamt-GRLen < 0: // Shamt < GRLen
1615 // Lo = Lo << Shamt
1616 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
1617 // else:
1618 // Lo = 0
1619 // Hi = Lo << (Shamt-GRLen)
1620
1621 SDValue Zero = DAG.getConstant(0, DL, VT);
1622 SDValue One = DAG.getConstant(1, DL, VT);
1623 SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
1624 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
1625 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
1626 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
1627
1628 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
1629 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
1630 SDValue ShiftRightLo =
1631 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
1632 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
1633 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
1634 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
1635
1636 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
1637
1638 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
1639 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
1640
1641 SDValue Parts[2] = {Lo, Hi};
1642 return DAG.getMergeValues(Parts, DL);
1643}
1644
1645SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
1646 SelectionDAG &DAG,
1647 bool IsSRA) const {
1648 SDLoc DL(Op);
1649 SDValue Lo = Op.getOperand(0);
1650 SDValue Hi = Op.getOperand(1);
1651 SDValue Shamt = Op.getOperand(2);
1652 EVT VT = Lo.getValueType();
1653
1654 // SRA expansion:
1655 // if Shamt-GRLen < 0: // Shamt < GRLen
1656 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
1657 // Hi = Hi >>s Shamt
1658 // else:
1659 // Lo = Hi >>s (Shamt-GRLen);
1660 // Hi = Hi >>s (GRLen-1)
1661 //
1662 // SRL expansion:
1663 // if Shamt-GRLen < 0: // Shamt < GRLen
1664 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
1665 // Hi = Hi >>u Shamt
1666 // else:
1667 // Lo = Hi >>u (Shamt-GRLen);
1668 // Hi = 0;
1669
1670 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
1671
1672 SDValue Zero = DAG.getConstant(0, DL, VT);
1673 SDValue One = DAG.getConstant(1, DL, VT);
1674 SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
1675 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
1676 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
1677 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
1678
1679 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
1680 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
1681 SDValue ShiftLeftHi =
1682 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
1683 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
1684 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
1685 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
1686 SDValue HiFalse =
1687 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
1688
1689 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
1690
1691 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
1692 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
1693
1694 SDValue Parts[2] = {Lo, Hi};
1695 return DAG.getMergeValues(Parts, DL);
1696}
1697
1698// Returns the opcode of the target-specific SDNode that implements the 32-bit
1699// form of the given Opcode.
1701 switch (Opcode) {
1702 default:
1703 llvm_unreachable("Unexpected opcode");
1704 case ISD::UDIV:
1705 return LoongArchISD::DIV_WU;
1706 case ISD::UREM:
1707 return LoongArchISD::MOD_WU;
1708 case ISD::SHL:
1709 return LoongArchISD::SLL_W;
1710 case ISD::SRA:
1711 return LoongArchISD::SRA_W;
1712 case ISD::SRL:
1713 return LoongArchISD::SRL_W;
1714 case ISD::ROTL:
1715 case ISD::ROTR:
1716 return LoongArchISD::ROTR_W;
1717 case ISD::CTTZ:
1718 return LoongArchISD::CTZ_W;
1719 case ISD::CTLZ:
1720 return LoongArchISD::CLZ_W;
1721 }
1722}
1723
1724// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
1725// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
1726// otherwise be promoted to i64, making it difficult to select the
1727// SLL_W/.../*W later one because the fact the operation was originally of
1728// type i8/i16/i32 is lost.
1730 unsigned ExtOpc = ISD::ANY_EXTEND) {
1731 SDLoc DL(N);
1732 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
1733 SDValue NewOp0, NewRes;
1734
1735 switch (NumOp) {
1736 default:
1737 llvm_unreachable("Unexpected NumOp");
1738 case 1: {
1739 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
1740 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
1741 break;
1742 }
1743 case 2: {
1744 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
1745 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
1746 if (N->getOpcode() == ISD::ROTL) {
1747 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
1748 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
1749 }
1750 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
1751 break;
1752 }
1753 // TODO:Handle more NumOp.
1754 }
1755
1756 // ReplaceNodeResults requires we maintain the same type for the return
1757 // value.
1758 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
1759}
1760
1761// Converts the given 32-bit operation to a i64 operation with signed extension
1762// semantic to reduce the signed extension instructions.
1764 SDLoc DL(N);
1765 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
1766 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
1767 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
1768 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
1769 DAG.getValueType(MVT::i32));
1770 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
1771}
1772
1773// Helper function that emits error message for intrinsics with/without chain
1774// and return a UNDEF or and the chain as the results.
1777 StringRef ErrorMsg, bool WithChain = true) {
1778 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
1779 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
1780 if (!WithChain)
1781 return;
1782 Results.push_back(N->getOperand(0));
1783}
1784
1785template <unsigned N>
1786static void
1788 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
1789 unsigned ResOp) {
1790 const StringRef ErrorMsgOOR = "argument out of range";
1791 unsigned Imm = Node->getConstantOperandVal(2);
1792 if (!isUInt<N>(Imm)) {
1794 /*WithChain=*/false);
1795 return;
1796 }
1797 SDLoc DL(Node);
1798 SDValue Vec = Node->getOperand(1);
1799
1800 SDValue PickElt =
1801 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
1802 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
1804 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
1805 PickElt.getValue(0)));
1806}
1807
1810 SelectionDAG &DAG,
1811 const LoongArchSubtarget &Subtarget,
1812 unsigned ResOp) {
1813 SDLoc DL(N);
1814 SDValue Vec = N->getOperand(1);
1815
1816 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
1817 Results.push_back(
1818 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
1819}
1820
1821static void
1823 SelectionDAG &DAG,
1824 const LoongArchSubtarget &Subtarget) {
1825 switch (N->getConstantOperandVal(0)) {
1826 default:
1827 llvm_unreachable("Unexpected Intrinsic.");
1828 case Intrinsic::loongarch_lsx_vpickve2gr_b:
1829 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
1831 break;
1832 case Intrinsic::loongarch_lsx_vpickve2gr_h:
1833 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
1834 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
1836 break;
1837 case Intrinsic::loongarch_lsx_vpickve2gr_w:
1838 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
1840 break;
1841 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
1842 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
1844 break;
1845 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
1846 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
1847 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
1849 break;
1850 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
1851 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
1853 break;
1854 case Intrinsic::loongarch_lsx_bz_b:
1855 case Intrinsic::loongarch_lsx_bz_h:
1856 case Intrinsic::loongarch_lsx_bz_w:
1857 case Intrinsic::loongarch_lsx_bz_d:
1858 case Intrinsic::loongarch_lasx_xbz_b:
1859 case Intrinsic::loongarch_lasx_xbz_h:
1860 case Intrinsic::loongarch_lasx_xbz_w:
1861 case Intrinsic::loongarch_lasx_xbz_d:
1862 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1864 break;
1865 case Intrinsic::loongarch_lsx_bz_v:
1866 case Intrinsic::loongarch_lasx_xbz_v:
1867 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1869 break;
1870 case Intrinsic::loongarch_lsx_bnz_b:
1871 case Intrinsic::loongarch_lsx_bnz_h:
1872 case Intrinsic::loongarch_lsx_bnz_w:
1873 case Intrinsic::loongarch_lsx_bnz_d:
1874 case Intrinsic::loongarch_lasx_xbnz_b:
1875 case Intrinsic::loongarch_lasx_xbnz_h:
1876 case Intrinsic::loongarch_lasx_xbnz_w:
1877 case Intrinsic::loongarch_lasx_xbnz_d:
1878 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1880 break;
1881 case Intrinsic::loongarch_lsx_bnz_v:
1882 case Intrinsic::loongarch_lasx_xbnz_v:
1883 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1885 break;
1886 }
1887}
1888
1891 SDLoc DL(N);
1892 EVT VT = N->getValueType(0);
1893 switch (N->getOpcode()) {
1894 default:
1895 llvm_unreachable("Don't know how to legalize this operation");
1896 case ISD::ADD:
1897 case ISD::SUB:
1898 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1899 "Unexpected custom legalisation");
1900 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
1901 break;
1902 case ISD::UDIV:
1903 case ISD::UREM:
1904 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1905 "Unexpected custom legalisation");
1906 Results.push_back(customLegalizeToWOp(N, DAG, 2, ISD::SIGN_EXTEND));
1907 break;
1908 case ISD::SHL:
1909 case ISD::SRA:
1910 case ISD::SRL:
1911 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1912 "Unexpected custom legalisation");
1913 if (N->getOperand(1).getOpcode() != ISD::Constant) {
1914 Results.push_back(customLegalizeToWOp(N, DAG, 2));
1915 break;
1916 }
1917 break;
1918 case ISD::ROTL:
1919 case ISD::ROTR:
1920 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1921 "Unexpected custom legalisation");
1922 Results.push_back(customLegalizeToWOp(N, DAG, 2));
1923 break;
1924 case ISD::FP_TO_SINT: {
1925 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1926 "Unexpected custom legalisation");
1927 SDValue Src = N->getOperand(0);
1928 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
1929 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
1931 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
1932 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
1933 return;
1934 }
1935 // If the FP type needs to be softened, emit a library call using the 'si'
1936 // version. If we left it to default legalization we'd end up with 'di'.
1937 RTLIB::Libcall LC;
1938 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
1939 MakeLibCallOptions CallOptions;
1940 EVT OpVT = Src.getValueType();
1941 CallOptions.setTypeListBeforeSoften(OpVT, VT, true);
1942 SDValue Chain = SDValue();
1943 SDValue Result;
1944 std::tie(Result, Chain) =
1945 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
1946 Results.push_back(Result);
1947 break;
1948 }
1949 case ISD::BITCAST: {
1950 SDValue Src = N->getOperand(0);
1951 EVT SrcVT = Src.getValueType();
1952 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
1953 Subtarget.hasBasicF()) {
1954 SDValue Dst =
1955 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
1956 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
1957 }
1958 break;
1959 }
1960 case ISD::FP_TO_UINT: {
1961 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1962 "Unexpected custom legalisation");
1963 auto &TLI = DAG.getTargetLoweringInfo();
1964 SDValue Tmp1, Tmp2;
1965 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
1966 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
1967 break;
1968 }
1969 case ISD::BSWAP: {
1970 SDValue Src = N->getOperand(0);
1971 assert((VT == MVT::i16 || VT == MVT::i32) &&
1972 "Unexpected custom legalization");
1973 MVT GRLenVT = Subtarget.getGRLenVT();
1974 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
1975 SDValue Tmp;
1976 switch (VT.getSizeInBits()) {
1977 default:
1978 llvm_unreachable("Unexpected operand width");
1979 case 16:
1980 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
1981 break;
1982 case 32:
1983 // Only LA64 will get to here due to the size mismatch between VT and
1984 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
1985 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
1986 break;
1987 }
1988 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
1989 break;
1990 }
1991 case ISD::BITREVERSE: {
1992 SDValue Src = N->getOperand(0);
1993 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
1994 "Unexpected custom legalization");
1995 MVT GRLenVT = Subtarget.getGRLenVT();
1996 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
1997 SDValue Tmp;
1998 switch (VT.getSizeInBits()) {
1999 default:
2000 llvm_unreachable("Unexpected operand width");
2001 case 8:
2002 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
2003 break;
2004 case 32:
2005 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
2006 break;
2007 }
2008 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
2009 break;
2010 }
2011 case ISD::CTLZ:
2012 case ISD::CTTZ: {
2013 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2014 "Unexpected custom legalisation");
2015 Results.push_back(customLegalizeToWOp(N, DAG, 1));
2016 break;
2017 }
2019 SDValue Chain = N->getOperand(0);
2020 SDValue Op2 = N->getOperand(2);
2021 MVT GRLenVT = Subtarget.getGRLenVT();
2022 const StringRef ErrorMsgOOR = "argument out of range";
2023 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
2024 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
2025
2026 switch (N->getConstantOperandVal(1)) {
2027 default:
2028 llvm_unreachable("Unexpected Intrinsic.");
2029 case Intrinsic::loongarch_movfcsr2gr: {
2030 if (!Subtarget.hasBasicF()) {
2031 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
2032 return;
2033 }
2034 unsigned Imm = Op2->getAsZExtVal();
2035 if (!isUInt<2>(Imm)) {
2036 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
2037 return;
2038 }
2039 SDValue MOVFCSR2GRResults = DAG.getNode(
2040 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
2041 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2042 Results.push_back(
2043 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
2044 Results.push_back(MOVFCSR2GRResults.getValue(1));
2045 break;
2046 }
2047#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
2048 case Intrinsic::loongarch_##NAME: { \
2049 SDValue NODE = DAG.getNode( \
2050 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
2051 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
2052 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
2053 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
2054 Results.push_back(NODE.getValue(1)); \
2055 break; \
2056 }
2057 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
2058 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
2059 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
2060 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
2061 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
2062 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
2063#undef CRC_CASE_EXT_BINARYOP
2064
2065#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
2066 case Intrinsic::loongarch_##NAME: { \
2067 SDValue NODE = DAG.getNode( \
2068 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
2069 {Chain, Op2, \
2070 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
2071 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
2072 Results.push_back(NODE.getValue(1)); \
2073 break; \
2074 }
2075 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
2076 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
2077#undef CRC_CASE_EXT_UNARYOP
2078#define CSR_CASE(ID) \
2079 case Intrinsic::loongarch_##ID: { \
2080 if (!Subtarget.is64Bit()) \
2081 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
2082 break; \
2083 }
2084 CSR_CASE(csrrd_d);
2085 CSR_CASE(csrwr_d);
2086 CSR_CASE(csrxchg_d);
2087 CSR_CASE(iocsrrd_d);
2088#undef CSR_CASE
2089 case Intrinsic::loongarch_csrrd_w: {
2090 unsigned Imm = Op2->getAsZExtVal();
2091 if (!isUInt<14>(Imm)) {
2092 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
2093 return;
2094 }
2095 SDValue CSRRDResults =
2096 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
2097 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2098 Results.push_back(
2099 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
2100 Results.push_back(CSRRDResults.getValue(1));
2101 break;
2102 }
2103 case Intrinsic::loongarch_csrwr_w: {
2104 unsigned Imm = N->getConstantOperandVal(3);
2105 if (!isUInt<14>(Imm)) {
2106 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
2107 return;
2108 }
2109 SDValue CSRWRResults =
2110 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
2111 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
2112 DAG.getConstant(Imm, DL, GRLenVT)});
2113 Results.push_back(
2114 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
2115 Results.push_back(CSRWRResults.getValue(1));
2116 break;
2117 }
2118 case Intrinsic::loongarch_csrxchg_w: {
2119 unsigned Imm = N->getConstantOperandVal(4);
2120 if (!isUInt<14>(Imm)) {
2121 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
2122 return;
2123 }
2124 SDValue CSRXCHGResults = DAG.getNode(
2125 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
2126 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
2127 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
2128 DAG.getConstant(Imm, DL, GRLenVT)});
2129 Results.push_back(
2130 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
2131 Results.push_back(CSRXCHGResults.getValue(1));
2132 break;
2133 }
2134#define IOCSRRD_CASE(NAME, NODE) \
2135 case Intrinsic::loongarch_##NAME: { \
2136 SDValue IOCSRRDResults = \
2137 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
2138 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
2139 Results.push_back( \
2140 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
2141 Results.push_back(IOCSRRDResults.getValue(1)); \
2142 break; \
2143 }
2144 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
2145 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
2146 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
2147#undef IOCSRRD_CASE
2148 case Intrinsic::loongarch_cpucfg: {
2149 SDValue CPUCFGResults =
2150 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
2151 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
2152 Results.push_back(
2153 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
2154 Results.push_back(CPUCFGResults.getValue(1));
2155 break;
2156 }
2157 case Intrinsic::loongarch_lddir_d: {
2158 if (!Subtarget.is64Bit()) {
2159 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
2160 return;
2161 }
2162 break;
2163 }
2164 }
2165 break;
2166 }
2167 case ISD::READ_REGISTER: {
2168 if (Subtarget.is64Bit())
2169 DAG.getContext()->emitError(
2170 "On LA64, only 64-bit registers can be read.");
2171 else
2172 DAG.getContext()->emitError(
2173 "On LA32, only 32-bit registers can be read.");
2174 Results.push_back(DAG.getUNDEF(VT));
2175 Results.push_back(N->getOperand(0));
2176 break;
2177 }
2179 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
2180 break;
2181 }
2182 }
2183}
2184
2187 const LoongArchSubtarget &Subtarget) {
2188 if (DCI.isBeforeLegalizeOps())
2189 return SDValue();
2190
2191 SDValue FirstOperand = N->getOperand(0);
2192 SDValue SecondOperand = N->getOperand(1);
2193 unsigned FirstOperandOpc = FirstOperand.getOpcode();
2194 EVT ValTy = N->getValueType(0);
2195 SDLoc DL(N);
2196 uint64_t lsb, msb;
2197 unsigned SMIdx, SMLen;
2198 ConstantSDNode *CN;
2199 SDValue NewOperand;
2200 MVT GRLenVT = Subtarget.getGRLenVT();
2201
2202 // Op's second operand must be a shifted mask.
2203 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
2204 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
2205 return SDValue();
2206
2207 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
2208 // Pattern match BSTRPICK.
2209 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
2210 // => BSTRPICK $dst, $src, msb, lsb
2211 // where msb = lsb + len - 1
2212
2213 // The second operand of the shift must be an immediate.
2214 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
2215 return SDValue();
2216
2217 lsb = CN->getZExtValue();
2218
2219 // Return if the shifted mask does not start at bit 0 or the sum of its
2220 // length and lsb exceeds the word's size.
2221 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
2222 return SDValue();
2223
2224 NewOperand = FirstOperand.getOperand(0);
2225 } else {
2226 // Pattern match BSTRPICK.
2227 // $dst = and $src, (2**len- 1) , if len > 12
2228 // => BSTRPICK $dst, $src, msb, lsb
2229 // where lsb = 0 and msb = len - 1
2230
2231 // If the mask is <= 0xfff, andi can be used instead.
2232 if (CN->getZExtValue() <= 0xfff)
2233 return SDValue();
2234
2235 // Return if the MSB exceeds.
2236 if (SMIdx + SMLen > ValTy.getSizeInBits())
2237 return SDValue();
2238
2239 if (SMIdx > 0) {
2240 // Omit if the constant has more than 2 uses. This a conservative
2241 // decision. Whether it is a win depends on the HW microarchitecture.
2242 // However it should always be better for 1 and 2 uses.
2243 if (CN->use_size() > 2)
2244 return SDValue();
2245 // Return if the constant can be composed by a single LU12I.W.
2246 if ((CN->getZExtValue() & 0xfff) == 0)
2247 return SDValue();
2248 // Return if the constand can be composed by a single ADDI with
2249 // the zero register.
2250 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
2251 return SDValue();
2252 }
2253
2254 lsb = SMIdx;
2255 NewOperand = FirstOperand;
2256 }
2257
2258 msb = lsb + SMLen - 1;
2259 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
2260 DAG.getConstant(msb, DL, GRLenVT),
2261 DAG.getConstant(lsb, DL, GRLenVT));
2262 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
2263 return NR0;
2264 // Try to optimize to
2265 // bstrpick $Rd, $Rs, msb, lsb
2266 // slli $Rd, $Rd, lsb
2267 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
2268 DAG.getConstant(lsb, DL, GRLenVT));
2269}
2270
2273 const LoongArchSubtarget &Subtarget) {
2274 if (DCI.isBeforeLegalizeOps())
2275 return SDValue();
2276
2277 // $dst = srl (and $src, Mask), Shamt
2278 // =>
2279 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
2280 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
2281 //
2282
2283 SDValue FirstOperand = N->getOperand(0);
2284 ConstantSDNode *CN;
2285 EVT ValTy = N->getValueType(0);
2286 SDLoc DL(N);
2287 MVT GRLenVT = Subtarget.getGRLenVT();
2288 unsigned MaskIdx, MaskLen;
2289 uint64_t Shamt;
2290
2291 // The first operand must be an AND and the second operand of the AND must be
2292 // a shifted mask.
2293 if (FirstOperand.getOpcode() != ISD::AND ||
2294 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
2295 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
2296 return SDValue();
2297
2298 // The second operand (shift amount) must be an immediate.
2299 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
2300 return SDValue();
2301
2302 Shamt = CN->getZExtValue();
2303 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
2304 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
2305 FirstOperand->getOperand(0),
2306 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
2307 DAG.getConstant(Shamt, DL, GRLenVT));
2308
2309 return SDValue();
2310}
2311
2314 const LoongArchSubtarget &Subtarget) {
2315 MVT GRLenVT = Subtarget.getGRLenVT();
2316 EVT ValTy = N->getValueType(0);
2317 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2318 ConstantSDNode *CN0, *CN1;
2319 SDLoc DL(N);
2320 unsigned ValBits = ValTy.getSizeInBits();
2321 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
2322 unsigned Shamt;
2323 bool SwapAndRetried = false;
2324
2325 if (DCI.isBeforeLegalizeOps())
2326 return SDValue();
2327
2328 if (ValBits != 32 && ValBits != 64)
2329 return SDValue();
2330
2331Retry:
2332 // 1st pattern to match BSTRINS:
2333 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
2334 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
2335 // =>
2336 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
2337 if (N0.getOpcode() == ISD::AND &&
2338 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2339 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
2340 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
2341 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2342 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
2343 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
2344 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
2345 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
2346 (MaskIdx0 + MaskLen0 <= ValBits)) {
2347 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
2348 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2349 N1.getOperand(0).getOperand(0),
2350 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
2351 DAG.getConstant(MaskIdx0, DL, GRLenVT));
2352 }
2353
2354 // 2nd pattern to match BSTRINS:
2355 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
2356 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
2357 // =>
2358 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
2359 if (N0.getOpcode() == ISD::AND &&
2360 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2361 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
2362 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
2363 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2364 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
2365 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
2366 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
2367 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
2368 (MaskIdx0 + MaskLen0 <= ValBits)) {
2369 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
2370 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2371 N1.getOperand(0).getOperand(0),
2372 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
2373 DAG.getConstant(MaskIdx0, DL, GRLenVT));
2374 }
2375
2376 // 3rd pattern to match BSTRINS:
2377 // R = or (and X, mask0), (and Y, mask1)
2378 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
2379 // =>
2380 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
2381 // where msb = lsb + size - 1
2382 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
2383 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2384 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
2385 (MaskIdx0 + MaskLen0 <= 64) &&
2386 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
2387 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
2388 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
2389 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2390 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
2391 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
2392 DAG.getConstant(ValBits == 32
2393 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
2394 : (MaskIdx0 + MaskLen0 - 1),
2395 DL, GRLenVT),
2396 DAG.getConstant(MaskIdx0, DL, GRLenVT));
2397 }
2398
2399 // 4th pattern to match BSTRINS:
2400 // R = or (and X, mask), (shl Y, shamt)
2401 // where mask = (2**shamt - 1)
2402 // =>
2403 // R = BSTRINS X, Y, ValBits - 1, shamt
2404 // where ValBits = 32 or 64
2405 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
2406 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2407 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
2408 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2409 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
2410 (MaskIdx0 + MaskLen0 <= ValBits)) {
2411 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
2412 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2413 N1.getOperand(0),
2414 DAG.getConstant((ValBits - 1), DL, GRLenVT),
2415 DAG.getConstant(Shamt, DL, GRLenVT));
2416 }
2417
2418 // 5th pattern to match BSTRINS:
2419 // R = or (and X, mask), const
2420 // where ~mask = (2**size - 1) << lsb, mask & const = 0
2421 // =>
2422 // R = BSTRINS X, (const >> lsb), msb, lsb
2423 // where msb = lsb + size - 1
2424 if (N0.getOpcode() == ISD::AND &&
2425 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2426 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
2427 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
2428 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
2429 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
2430 return DAG.getNode(
2431 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2432 DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
2433 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
2434 : (MaskIdx0 + MaskLen0 - 1),
2435 DL, GRLenVT),
2436 DAG.getConstant(MaskIdx0, DL, GRLenVT));
2437 }
2438
2439 // 6th pattern.
2440 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
2441 // by the incoming bits are known to be zero.
2442 // =>
2443 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
2444 //
2445 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
2446 // pattern is more common than the 1st. So we put the 1st before the 6th in
2447 // order to match as many nodes as possible.
2448 ConstantSDNode *CNMask, *CNShamt;
2449 unsigned MaskIdx, MaskLen;
2450 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
2451 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
2452 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
2453 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2454 CNShamt->getZExtValue() + MaskLen <= ValBits) {
2455 Shamt = CNShamt->getZExtValue();
2456 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
2457 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
2458 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
2459 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
2460 N1.getOperand(0).getOperand(0),
2461 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
2462 DAG.getConstant(Shamt, DL, GRLenVT));
2463 }
2464 }
2465
2466 // 7th pattern.
2467 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
2468 // overwritten by the incoming bits are known to be zero.
2469 // =>
2470 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
2471 //
2472 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
2473 // before the 7th in order to match as many nodes as possible.
2474 if (N1.getOpcode() == ISD::AND &&
2475 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2476 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
2477 N1.getOperand(0).getOpcode() == ISD::SHL &&
2478 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
2479 CNShamt->getZExtValue() == MaskIdx) {
2480 APInt ShMask(ValBits, CNMask->getZExtValue());
2481 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
2482 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
2483 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
2484 N1.getOperand(0).getOperand(0),
2485 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
2486 DAG.getConstant(MaskIdx, DL, GRLenVT));
2487 }
2488 }
2489
2490 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
2491 if (!SwapAndRetried) {
2492 std::swap(N0, N1);
2493 SwapAndRetried = true;
2494 goto Retry;
2495 }
2496
2497 SwapAndRetried = false;
2498Retry2:
2499 // 8th pattern.
2500 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
2501 // the incoming bits are known to be zero.
2502 // =>
2503 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
2504 //
2505 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
2506 // we put it here in order to match as many nodes as possible or generate less
2507 // instructions.
2508 if (N1.getOpcode() == ISD::AND &&
2509 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2510 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
2511 APInt ShMask(ValBits, CNMask->getZExtValue());
2512 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
2513 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
2514 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
2515 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
2516 N1->getOperand(0),
2517 DAG.getConstant(MaskIdx, DL, GRLenVT)),
2518 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
2519 DAG.getConstant(MaskIdx, DL, GRLenVT));
2520 }
2521 }
2522 // Swap N0/N1 and retry.
2523 if (!SwapAndRetried) {
2524 std::swap(N0, N1);
2525 SwapAndRetried = true;
2526 goto Retry2;
2527 }
2528
2529 return SDValue();
2530}
2531
2532static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
2533 ExtType = ISD::NON_EXTLOAD;
2534
2535 switch (V.getNode()->getOpcode()) {
2536 case ISD::LOAD: {
2537 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
2538 if ((LoadNode->getMemoryVT() == MVT::i8) ||
2539 (LoadNode->getMemoryVT() == MVT::i16)) {
2540 ExtType = LoadNode->getExtensionType();
2541 return true;
2542 }
2543 return false;
2544 }
2545 case ISD::AssertSext: {
2546 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
2547 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
2548 ExtType = ISD::SEXTLOAD;
2549 return true;
2550 }
2551 return false;
2552 }
2553 case ISD::AssertZext: {
2554 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
2555 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
2556 ExtType = ISD::ZEXTLOAD;
2557 return true;
2558 }
2559 return false;
2560 }
2561 default:
2562 return false;
2563 }
2564
2565 return false;
2566}
2567
2568// Eliminate redundant truncation and zero-extension nodes.
2569// * Case 1:
2570// +------------+ +------------+ +------------+
2571// | Input1 | | Input2 | | CC |
2572// +------------+ +------------+ +------------+
2573// | | |
2574// V V +----+
2575// +------------+ +------------+ |
2576// | TRUNCATE | | TRUNCATE | |
2577// +------------+ +------------+ |
2578// | | |
2579// V V |
2580// +------------+ +------------+ |
2581// | ZERO_EXT | | ZERO_EXT | |
2582// +------------+ +------------+ |
2583// | | |
2584// | +-------------+ |
2585// V V | |
2586// +----------------+ | |
2587// | AND | | |
2588// +----------------+ | |
2589// | | |
2590// +---------------+ | |
2591// | | |
2592// V V V
2593// +-------------+
2594// | CMP |
2595// +-------------+
2596// * Case 2:
2597// +------------+ +------------+ +-------------+ +------------+ +------------+
2598// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
2599// +------------+ +------------+ +-------------+ +------------+ +------------+
2600// | | | | |
2601// V | | | |
2602// +------------+ | | | |
2603// | XOR |<---------------------+ | |
2604// +------------+ | | |
2605// | | | |
2606// V V +---------------+ |
2607// +------------+ +------------+ | |
2608// | TRUNCATE | | TRUNCATE | | +-------------------------+
2609// +------------+ +------------+ | |
2610// | | | |
2611// V V | |
2612// +------------+ +------------+ | |
2613// | ZERO_EXT | | ZERO_EXT | | |
2614// +------------+ +------------+ | |
2615// | | | |
2616// V V | |
2617// +----------------+ | |
2618// | AND | | |
2619// +----------------+ | |
2620// | | |
2621// +---------------+ | |
2622// | | |
2623// V V V
2624// +-------------+
2625// | CMP |
2626// +-------------+
2629 const LoongArchSubtarget &Subtarget) {
2630 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2631
2632 SDNode *AndNode = N->getOperand(0).getNode();
2633 if (AndNode->getOpcode() != ISD::AND)
2634 return SDValue();
2635
2636 SDValue AndInputValue2 = AndNode->getOperand(1);
2637 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
2638 return SDValue();
2639
2640 SDValue CmpInputValue = N->getOperand(1);
2641 SDValue AndInputValue1 = AndNode->getOperand(0);
2642 if (AndInputValue1.getOpcode() == ISD::XOR) {
2643 if (CC != ISD::SETEQ && CC != ISD::SETNE)
2644 return SDValue();
2645 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
2646 if (!CN || CN->getSExtValue() != -1)
2647 return SDValue();
2648 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
2649 if (!CN || CN->getSExtValue() != 0)
2650 return SDValue();
2651 AndInputValue1 = AndInputValue1.getOperand(0);
2652 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
2653 return SDValue();
2654 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
2655 if (AndInputValue2 != CmpInputValue)
2656 return SDValue();
2657 } else {
2658 return SDValue();
2659 }
2660
2661 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
2662 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
2663 return SDValue();
2664
2665 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
2666 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
2667 return SDValue();
2668
2669 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
2670 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
2671 ISD::LoadExtType ExtType1;
2672 ISD::LoadExtType ExtType2;
2673
2674 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
2675 !checkValueWidth(TruncInputValue2, ExtType2))
2676 return SDValue();
2677
2678 if ((ExtType2 != ISD::ZEXTLOAD) &&
2679 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
2680 return SDValue();
2681
2682 // These truncation and zero-extension nodes are not necessary, remove them.
2683 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
2684 TruncInputValue1, TruncInputValue2);
2685 SDValue NewSetCC =
2686 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
2687 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
2688 return SDValue(N, 0);
2689}
2690
2691// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
2694 const LoongArchSubtarget &Subtarget) {
2695 if (DCI.isBeforeLegalizeOps())
2696 return SDValue();
2697
2698 SDValue Src = N->getOperand(0);
2699 if (Src.getOpcode() != LoongArchISD::REVB_2W)
2700 return SDValue();
2701
2702 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
2703 Src.getOperand(0));
2704}
2705
2706template <unsigned N>
2708 SelectionDAG &DAG,
2709 const LoongArchSubtarget &Subtarget,
2710 bool IsSigned = false) {
2711 SDLoc DL(Node);
2712 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
2713 // Check the ImmArg.
2714 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
2715 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
2716 DAG.getContext()->emitError(Node->getOperationName(0) +
2717 ": argument out of range.");
2718 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
2719 }
2720 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
2721}
2722
2723template <unsigned N>
2724static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
2725 SelectionDAG &DAG, bool IsSigned = false) {
2726 SDLoc DL(Node);
2727 EVT ResTy = Node->getValueType(0);
2728 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
2729
2730 // Check the ImmArg.
2731 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
2732 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
2733 DAG.getContext()->emitError(Node->getOperationName(0) +
2734 ": argument out of range.");
2735 return DAG.getNode(ISD::UNDEF, DL, ResTy);
2736 }
2737 return DAG.getConstant(
2739 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
2740 DL, ResTy);
2741}
2742
2744 SDLoc DL(Node);
2745 EVT ResTy = Node->getValueType(0);
2746 SDValue Vec = Node->getOperand(2);
2747 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
2748 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
2749}
2750
2752 SDLoc DL(Node);
2753 EVT ResTy = Node->getValueType(0);
2754 SDValue One = DAG.getConstant(1, DL, ResTy);
2755 SDValue Bit =
2756 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
2757
2758 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
2759 DAG.getNOT(DL, Bit, ResTy));
2760}
2761
2762template <unsigned N>
2764 SDLoc DL(Node);
2765 EVT ResTy = Node->getValueType(0);
2766 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
2767 // Check the unsigned ImmArg.
2768 if (!isUInt<N>(CImm->getZExtValue())) {
2769 DAG.getContext()->emitError(Node->getOperationName(0) +
2770 ": argument out of range.");
2771 return DAG.getNode(ISD::UNDEF, DL, ResTy);
2772 }
2773
2774 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
2775 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
2776
2777 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
2778}
2779
2780template <unsigned N>
2782 SDLoc DL(Node);
2783 EVT ResTy = Node->getValueType(0);
2784 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
2785 // Check the unsigned ImmArg.
2786 if (!isUInt<N>(CImm->getZExtValue())) {
2787 DAG.getContext()->emitError(Node->getOperationName(0) +
2788 ": argument out of range.");
2789 return DAG.getNode(ISD::UNDEF, DL, ResTy);
2790 }
2791
2792 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
2793 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
2794 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
2795}
2796
2797template <unsigned N>
2799 SDLoc DL(Node);
2800 EVT ResTy = Node->getValueType(0);
2801 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
2802 // Check the unsigned ImmArg.
2803 if (!isUInt<N>(CImm->getZExtValue())) {
2804 DAG.getContext()->emitError(Node->getOperationName(0) +
2805 ": argument out of range.");
2806 return DAG.getNode(ISD::UNDEF, DL, ResTy);
2807 }
2808
2809 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
2810 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
2811 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
2812}
2813
2814static SDValue
2817 const LoongArchSubtarget &Subtarget) {
2818 SDLoc DL(N);
2819 switch (N->getConstantOperandVal(0)) {
2820 default:
2821 break;
2822 case Intrinsic::loongarch_lsx_vadd_b:
2823 case Intrinsic::loongarch_lsx_vadd_h:
2824 case Intrinsic::loongarch_lsx_vadd_w:
2825 case Intrinsic::loongarch_lsx_vadd_d:
2826 case Intrinsic::loongarch_lasx_xvadd_b:
2827 case Intrinsic::loongarch_lasx_xvadd_h:
2828 case Intrinsic::loongarch_lasx_xvadd_w:
2829 case Intrinsic::loongarch_lasx_xvadd_d:
2830 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
2831 N->getOperand(2));
2832 case Intrinsic::loongarch_lsx_vaddi_bu:
2833 case Intrinsic::loongarch_lsx_vaddi_hu:
2834 case Intrinsic::loongarch_lsx_vaddi_wu:
2835 case Intrinsic::loongarch_lsx_vaddi_du:
2836 case Intrinsic::loongarch_lasx_xvaddi_bu:
2837 case Intrinsic::loongarch_lasx_xvaddi_hu:
2838 case Intrinsic::loongarch_lasx_xvaddi_wu:
2839 case Intrinsic::loongarch_lasx_xvaddi_du:
2840 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
2841 lowerVectorSplatImm<5>(N, 2, DAG));
2842 case Intrinsic::loongarch_lsx_vsub_b:
2843 case Intrinsic::loongarch_lsx_vsub_h:
2844 case Intrinsic::loongarch_lsx_vsub_w:
2845 case Intrinsic::loongarch_lsx_vsub_d:
2846 case Intrinsic::loongarch_lasx_xvsub_b:
2847 case Intrinsic::loongarch_lasx_xvsub_h:
2848 case Intrinsic::loongarch_lasx_xvsub_w:
2849 case Intrinsic::loongarch_lasx_xvsub_d:
2850 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
2851 N->getOperand(2));
2852 case Intrinsic::loongarch_lsx_vsubi_bu:
2853 case Intrinsic::loongarch_lsx_vsubi_hu:
2854 case Intrinsic::loongarch_lsx_vsubi_wu:
2855 case Intrinsic::loongarch_lsx_vsubi_du:
2856 case Intrinsic::loongarch_lasx_xvsubi_bu:
2857 case Intrinsic::loongarch_lasx_xvsubi_hu:
2858 case Intrinsic::loongarch_lasx_xvsubi_wu:
2859 case Intrinsic::loongarch_lasx_xvsubi_du:
2860 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
2861 lowerVectorSplatImm<5>(N, 2, DAG));
2862 case Intrinsic::loongarch_lsx_vneg_b:
2863 case Intrinsic::loongarch_lsx_vneg_h:
2864 case Intrinsic::loongarch_lsx_vneg_w:
2865 case Intrinsic::loongarch_lsx_vneg_d:
2866 case Intrinsic::loongarch_lasx_xvneg_b:
2867 case Intrinsic::loongarch_lasx_xvneg_h:
2868 case Intrinsic::loongarch_lasx_xvneg_w:
2869 case Intrinsic::loongarch_lasx_xvneg_d:
2870 return DAG.getNode(
2871 ISD::SUB, DL, N->getValueType(0),
2872 DAG.getConstant(
2873 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
2874 /*isSigned=*/true),
2875 SDLoc(N), N->getValueType(0)),
2876 N->getOperand(1));
2877 case Intrinsic::loongarch_lsx_vmax_b:
2878 case Intrinsic::loongarch_lsx_vmax_h:
2879 case Intrinsic::loongarch_lsx_vmax_w:
2880 case Intrinsic::loongarch_lsx_vmax_d:
2881 case Intrinsic::loongarch_lasx_xvmax_b:
2882 case Intrinsic::loongarch_lasx_xvmax_h:
2883 case Intrinsic::loongarch_lasx_xvmax_w:
2884 case Intrinsic::loongarch_lasx_xvmax_d:
2885 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
2886 N->getOperand(2));
2887 case Intrinsic::loongarch_lsx_vmax_bu:
2888 case Intrinsic::loongarch_lsx_vmax_hu:
2889 case Intrinsic::loongarch_lsx_vmax_wu:
2890 case Intrinsic::loongarch_lsx_vmax_du:
2891 case Intrinsic::loongarch_lasx_xvmax_bu:
2892 case Intrinsic::loongarch_lasx_xvmax_hu:
2893 case Intrinsic::loongarch_lasx_xvmax_wu:
2894 case Intrinsic::loongarch_lasx_xvmax_du:
2895 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
2896 N->getOperand(2));
2897 case Intrinsic::loongarch_lsx_vmaxi_b:
2898 case Intrinsic::loongarch_lsx_vmaxi_h:
2899 case Intrinsic::loongarch_lsx_vmaxi_w:
2900 case Intrinsic::loongarch_lsx_vmaxi_d:
2901 case Intrinsic::loongarch_lasx_xvmaxi_b:
2902 case Intrinsic::loongarch_lasx_xvmaxi_h:
2903 case Intrinsic::loongarch_lasx_xvmaxi_w:
2904 case Intrinsic::loongarch_lasx_xvmaxi_d:
2905 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
2906 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
2907 case Intrinsic::loongarch_lsx_vmaxi_bu:
2908 case Intrinsic::loongarch_lsx_vmaxi_hu:
2909 case Intrinsic::loongarch_lsx_vmaxi_wu:
2910 case Intrinsic::loongarch_lsx_vmaxi_du:
2911 case Intrinsic::loongarch_lasx_xvmaxi_bu:
2912 case Intrinsic::loongarch_lasx_xvmaxi_hu:
2913 case Intrinsic::loongarch_lasx_xvmaxi_wu:
2914 case Intrinsic::loongarch_lasx_xvmaxi_du:
2915 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
2916 lowerVectorSplatImm<5>(N, 2, DAG));
2917 case Intrinsic::loongarch_lsx_vmin_b:
2918 case Intrinsic::loongarch_lsx_vmin_h:
2919 case Intrinsic::loongarch_lsx_vmin_w:
2920 case Intrinsic::loongarch_lsx_vmin_d:
2921 case Intrinsic::loongarch_lasx_xvmin_b:
2922 case Intrinsic::loongarch_lasx_xvmin_h:
2923 case Intrinsic::loongarch_lasx_xvmin_w:
2924 case Intrinsic::loongarch_lasx_xvmin_d:
2925 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
2926 N->getOperand(2));
2927 case Intrinsic::loongarch_lsx_vmin_bu:
2928 case Intrinsic::loongarch_lsx_vmin_hu:
2929 case Intrinsic::loongarch_lsx_vmin_wu:
2930 case Intrinsic::loongarch_lsx_vmin_du:
2931 case Intrinsic::loongarch_lasx_xvmin_bu:
2932 case Intrinsic::loongarch_lasx_xvmin_hu:
2933 case Intrinsic::loongarch_lasx_xvmin_wu:
2934 case Intrinsic::loongarch_lasx_xvmin_du:
2935 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
2936 N->getOperand(2));
2937 case Intrinsic::loongarch_lsx_vmini_b:
2938 case Intrinsic::loongarch_lsx_vmini_h:
2939 case Intrinsic::loongarch_lsx_vmini_w:
2940 case Intrinsic::loongarch_lsx_vmini_d:
2941 case Intrinsic::loongarch_lasx_xvmini_b:
2942 case Intrinsic::loongarch_lasx_xvmini_h:
2943 case Intrinsic::loongarch_lasx_xvmini_w:
2944 case Intrinsic::loongarch_lasx_xvmini_d:
2945 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
2946 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
2947 case Intrinsic::loongarch_lsx_vmini_bu:
2948 case Intrinsic::loongarch_lsx_vmini_hu:
2949 case Intrinsic::loongarch_lsx_vmini_wu:
2950 case Intrinsic::loongarch_lsx_vmini_du:
2951 case Intrinsic::loongarch_lasx_xvmini_bu:
2952 case Intrinsic::loongarch_lasx_xvmini_hu:
2953 case Intrinsic::loongarch_lasx_xvmini_wu:
2954 case Intrinsic::loongarch_lasx_xvmini_du:
2955 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
2956 lowerVectorSplatImm<5>(N, 2, DAG));
2957 case Intrinsic::loongarch_lsx_vmul_b:
2958 case Intrinsic::loongarch_lsx_vmul_h:
2959 case Intrinsic::loongarch_lsx_vmul_w:
2960 case Intrinsic::loongarch_lsx_vmul_d:
2961 case Intrinsic::loongarch_lasx_xvmul_b:
2962 case Intrinsic::loongarch_lasx_xvmul_h:
2963 case Intrinsic::loongarch_lasx_xvmul_w:
2964 case Intrinsic::loongarch_lasx_xvmul_d:
2965 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
2966 N->getOperand(2));
2967 case Intrinsic::loongarch_lsx_vmadd_b:
2968 case Intrinsic::loongarch_lsx_vmadd_h:
2969 case Intrinsic::loongarch_lsx_vmadd_w:
2970 case Intrinsic::loongarch_lsx_vmadd_d:
2971 case Intrinsic::loongarch_lasx_xvmadd_b:
2972 case Intrinsic::loongarch_lasx_xvmadd_h:
2973 case Intrinsic::loongarch_lasx_xvmadd_w:
2974 case Intrinsic::loongarch_lasx_xvmadd_d: {
2975 EVT ResTy = N->getValueType(0);
2976 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
2977 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
2978 N->getOperand(3)));
2979 }
2980 case Intrinsic::loongarch_lsx_vmsub_b:
2981 case Intrinsic::loongarch_lsx_vmsub_h:
2982 case Intrinsic::loongarch_lsx_vmsub_w:
2983 case Intrinsic::loongarch_lsx_vmsub_d:
2984 case Intrinsic::loongarch_lasx_xvmsub_b:
2985 case Intrinsic::loongarch_lasx_xvmsub_h:
2986 case Intrinsic::loongarch_lasx_xvmsub_w:
2987 case Intrinsic::loongarch_lasx_xvmsub_d: {
2988 EVT ResTy = N->getValueType(0);
2989 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
2990 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
2991 N->getOperand(3)));
2992 }
2993 case Intrinsic::loongarch_lsx_vdiv_b:
2994 case Intrinsic::loongarch_lsx_vdiv_h:
2995 case Intrinsic::loongarch_lsx_vdiv_w:
2996 case Intrinsic::loongarch_lsx_vdiv_d:
2997 case Intrinsic::loongarch_lasx_xvdiv_b:
2998 case Intrinsic::loongarch_lasx_xvdiv_h:
2999 case Intrinsic::loongarch_lasx_xvdiv_w:
3000 case Intrinsic::loongarch_lasx_xvdiv_d:
3001 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
3002 N->getOperand(2));
3003 case Intrinsic::loongarch_lsx_vdiv_bu:
3004 case Intrinsic::loongarch_lsx_vdiv_hu:
3005 case Intrinsic::loongarch_lsx_vdiv_wu:
3006 case Intrinsic::loongarch_lsx_vdiv_du:
3007 case Intrinsic::loongarch_lasx_xvdiv_bu:
3008 case Intrinsic::loongarch_lasx_xvdiv_hu:
3009 case Intrinsic::loongarch_lasx_xvdiv_wu:
3010 case Intrinsic::loongarch_lasx_xvdiv_du:
3011 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
3012 N->getOperand(2));
3013 case Intrinsic::loongarch_lsx_vmod_b:
3014 case Intrinsic::loongarch_lsx_vmod_h:
3015 case Intrinsic::loongarch_lsx_vmod_w:
3016 case Intrinsic::loongarch_lsx_vmod_d:
3017 case Intrinsic::loongarch_lasx_xvmod_b:
3018 case Intrinsic::loongarch_lasx_xvmod_h:
3019 case Intrinsic::loongarch_lasx_xvmod_w:
3020 case Intrinsic::loongarch_lasx_xvmod_d:
3021 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
3022 N->getOperand(2));
3023 case Intrinsic::loongarch_lsx_vmod_bu:
3024 case Intrinsic::loongarch_lsx_vmod_hu:
3025 case Intrinsic::loongarch_lsx_vmod_wu:
3026 case Intrinsic::loongarch_lsx_vmod_du:
3027 case Intrinsic::loongarch_lasx_xvmod_bu:
3028 case Intrinsic::loongarch_lasx_xvmod_hu:
3029 case Intrinsic::loongarch_lasx_xvmod_wu:
3030 case Intrinsic::loongarch_lasx_xvmod_du:
3031 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
3032 N->getOperand(2));
3033 case Intrinsic::loongarch_lsx_vand_v:
3034 case Intrinsic::loongarch_lasx_xvand_v:
3035 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
3036 N->getOperand(2));
3037 case Intrinsic::loongarch_lsx_vor_v:
3038 case Intrinsic::loongarch_lasx_xvor_v:
3039 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
3040 N->getOperand(2));
3041 case Intrinsic::loongarch_lsx_vxor_v:
3042 case Intrinsic::loongarch_lasx_xvxor_v:
3043 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
3044 N->getOperand(2));
3045 case Intrinsic::loongarch_lsx_vnor_v:
3046 case Intrinsic::loongarch_lasx_xvnor_v: {
3047 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
3048 N->getOperand(2));
3049 return DAG.getNOT(DL, Res, Res->getValueType(0));
3050 }
3051 case Intrinsic::loongarch_lsx_vandi_b:
3052 case Intrinsic::loongarch_lasx_xvandi_b:
3053 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
3054 lowerVectorSplatImm<8>(N, 2, DAG));
3055 case Intrinsic::loongarch_lsx_vori_b:
3056 case Intrinsic::loongarch_lasx_xvori_b:
3057 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
3058 lowerVectorSplatImm<8>(N, 2, DAG));
3059 case Intrinsic::loongarch_lsx_vxori_b:
3060 case Intrinsic::loongarch_lasx_xvxori_b:
3061 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
3062 lowerVectorSplatImm<8>(N, 2, DAG));
3063 case Intrinsic::loongarch_lsx_vsll_b:
3064 case Intrinsic::loongarch_lsx_vsll_h:
3065 case Intrinsic::loongarch_lsx_vsll_w:
3066 case Intrinsic::loongarch_lsx_vsll_d:
3067 case Intrinsic::loongarch_lasx_xvsll_b:
3068 case Intrinsic::loongarch_lasx_xvsll_h:
3069 case Intrinsic::loongarch_lasx_xvsll_w:
3070 case Intrinsic::loongarch_lasx_xvsll_d:
3071 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
3072 truncateVecElts(N, DAG));
3073 case Intrinsic::loongarch_lsx_vslli_b:
3074 case Intrinsic::loongarch_lasx_xvslli_b:
3075 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
3076 lowerVectorSplatImm<3>(N, 2, DAG));
3077 case Intrinsic::loongarch_lsx_vslli_h:
3078 case Intrinsic::loongarch_lasx_xvslli_h:
3079 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
3080 lowerVectorSplatImm<4>(N, 2, DAG));
3081 case Intrinsic::loongarch_lsx_vslli_w:
3082 case Intrinsic::loongarch_lasx_xvslli_w:
3083 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
3084 lowerVectorSplatImm<5>(N, 2, DAG));
3085 case Intrinsic::loongarch_lsx_vslli_d:
3086 case Intrinsic::loongarch_lasx_xvslli_d:
3087 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
3088 lowerVectorSplatImm<6>(N, 2, DAG));
3089 case Intrinsic::loongarch_lsx_vsrl_b:
3090 case Intrinsic::loongarch_lsx_vsrl_h:
3091 case Intrinsic::loongarch_lsx_vsrl_w:
3092 case Intrinsic::loongarch_lsx_vsrl_d:
3093 case Intrinsic::loongarch_lasx_xvsrl_b:
3094 case Intrinsic::loongarch_lasx_xvsrl_h:
3095 case Intrinsic::loongarch_lasx_xvsrl_w:
3096 case Intrinsic::loongarch_lasx_xvsrl_d:
3097 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
3098 truncateVecElts(N, DAG));
3099 case Intrinsic::loongarch_lsx_vsrli_b:
3100 case Intrinsic::loongarch_lasx_xvsrli_b:
3101 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
3102 lowerVectorSplatImm<3>(N, 2, DAG));
3103 case Intrinsic::loongarch_lsx_vsrli_h:
3104 case Intrinsic::loongarch_lasx_xvsrli_h:
3105 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
3106 lowerVectorSplatImm<4>(N, 2, DAG));
3107 case Intrinsic::loongarch_lsx_vsrli_w:
3108 case Intrinsic::loongarch_lasx_xvsrli_w:
3109 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
3110 lowerVectorSplatImm<5>(N, 2, DAG));
3111 case Intrinsic::loongarch_lsx_vsrli_d:
3112 case Intrinsic::loongarch_lasx_xvsrli_d:
3113 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
3114 lowerVectorSplatImm<6>(N, 2, DAG));
3115 case Intrinsic::loongarch_lsx_vsra_b:
3116 case Intrinsic::loongarch_lsx_vsra_h:
3117 case Intrinsic::loongarch_lsx_vsra_w:
3118 case Intrinsic::loongarch_lsx_vsra_d:
3119 case Intrinsic::loongarch_lasx_xvsra_b:
3120 case Intrinsic::loongarch_lasx_xvsra_h:
3121 case Intrinsic::loongarch_lasx_xvsra_w:
3122 case Intrinsic::loongarch_lasx_xvsra_d:
3123 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
3124 truncateVecElts(N, DAG));
3125 case Intrinsic::loongarch_lsx_vsrai_b:
3126 case Intrinsic::loongarch_lasx_xvsrai_b:
3127 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
3128 lowerVectorSplatImm<3>(N, 2, DAG));
3129 case Intrinsic::loongarch_lsx_vsrai_h:
3130 case Intrinsic::loongarch_lasx_xvsrai_h:
3131 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
3132 lowerVectorSplatImm<4>(N, 2, DAG));
3133 case Intrinsic::loongarch_lsx_vsrai_w:
3134 case Intrinsic::loongarch_lasx_xvsrai_w:
3135 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
3136 lowerVectorSplatImm<5>(N, 2, DAG));
3137 case Intrinsic::loongarch_lsx_vsrai_d:
3138 case Intrinsic::loongarch_lasx_xvsrai_d:
3139 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
3140 lowerVectorSplatImm<6>(N, 2, DAG));
3141 case Intrinsic::loongarch_lsx_vclz_b:
3142 case Intrinsic::loongarch_lsx_vclz_h:
3143 case Intrinsic::loongarch_lsx_vclz_w:
3144 case Intrinsic::loongarch_lsx_vclz_d:
3145 case Intrinsic::loongarch_lasx_xvclz_b:
3146 case Intrinsic::loongarch_lasx_xvclz_h:
3147 case Intrinsic::loongarch_lasx_xvclz_w:
3148 case Intrinsic::loongarch_lasx_xvclz_d:
3149 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
3150 case Intrinsic::loongarch_lsx_vpcnt_b:
3151 case Intrinsic::loongarch_lsx_vpcnt_h:
3152 case Intrinsic::loongarch_lsx_vpcnt_w:
3153 case Intrinsic::loongarch_lsx_vpcnt_d:
3154 case Intrinsic::loongarch_lasx_xvpcnt_b:
3155 case Intrinsic::loongarch_lasx_xvpcnt_h:
3156 case Intrinsic::loongarch_lasx_xvpcnt_w:
3157 case Intrinsic::loongarch_lasx_xvpcnt_d:
3158 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
3159 case Intrinsic::loongarch_lsx_vbitclr_b:
3160 case Intrinsic::loongarch_lsx_vbitclr_h:
3161 case Intrinsic::loongarch_lsx_vbitclr_w:
3162 case Intrinsic::loongarch_lsx_vbitclr_d:
3163 case Intrinsic::loongarch_lasx_xvbitclr_b:
3164 case Intrinsic::loongarch_lasx_xvbitclr_h:
3165 case Intrinsic::loongarch_lasx_xvbitclr_w:
3166 case Intrinsic::loongarch_lasx_xvbitclr_d:
3167 return lowerVectorBitClear(N, DAG);
3168 case Intrinsic::loongarch_lsx_vbitclri_b:
3169 case Intrinsic::loongarch_lasx_xvbitclri_b:
3170 return lowerVectorBitClearImm<3>(N, DAG);
3171 case Intrinsic::loongarch_lsx_vbitclri_h:
3172 case Intrinsic::loongarch_lasx_xvbitclri_h:
3173 return lowerVectorBitClearImm<4>(N, DAG);
3174 case Intrinsic::loongarch_lsx_vbitclri_w:
3175 case Intrinsic::loongarch_lasx_xvbitclri_w:
3176 return lowerVectorBitClearImm<5>(N, DAG);
3177 case Intrinsic::loongarch_lsx_vbitclri_d:
3178 case Intrinsic::loongarch_lasx_xvbitclri_d:
3179 return lowerVectorBitClearImm<6>(N, DAG);
3180 case Intrinsic::loongarch_lsx_vbitset_b:
3181 case Intrinsic::loongarch_lsx_vbitset_h:
3182 case Intrinsic::loongarch_lsx_vbitset_w:
3183 case Intrinsic::loongarch_lsx_vbitset_d:
3184 case Intrinsic::loongarch_lasx_xvbitset_b:
3185 case Intrinsic::loongarch_lasx_xvbitset_h:
3186 case Intrinsic::loongarch_lasx_xvbitset_w:
3187 case Intrinsic::loongarch_lasx_xvbitset_d: {
3188 EVT VecTy = N->getValueType(0);
3189 SDValue One = DAG.getConstant(1, DL, VecTy);
3190 return DAG.getNode(
3191 ISD::OR, DL, VecTy, N->getOperand(1),
3192 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
3193 }
3194 case Intrinsic::loongarch_lsx_vbitseti_b:
3195 case Intrinsic::loongarch_lasx_xvbitseti_b:
3196 return lowerVectorBitSetImm<3>(N, DAG);
3197 case Intrinsic::loongarch_lsx_vbitseti_h:
3198 case Intrinsic::loongarch_lasx_xvbitseti_h:
3199 return lowerVectorBitSetImm<4>(N, DAG);
3200 case Intrinsic::loongarch_lsx_vbitseti_w:
3201 case Intrinsic::loongarch_lasx_xvbitseti_w:
3202 return lowerVectorBitSetImm<5>(N, DAG);
3203 case Intrinsic::loongarch_lsx_vbitseti_d:
3204 case Intrinsic::loongarch_lasx_xvbitseti_d:
3205 return lowerVectorBitSetImm<6>(N, DAG);
3206 case Intrinsic::loongarch_lsx_vbitrev_b:
3207 case Intrinsic::loongarch_lsx_vbitrev_h:
3208 case Intrinsic::loongarch_lsx_vbitrev_w:
3209 case Intrinsic::loongarch_lsx_vbitrev_d:
3210 case Intrinsic::loongarch_lasx_xvbitrev_b:
3211 case Intrinsic::loongarch_lasx_xvbitrev_h:
3212 case Intrinsic::loongarch_lasx_xvbitrev_w:
3213 case Intrinsic::loongarch_lasx_xvbitrev_d: {
3214 EVT VecTy = N->getValueType(0);
3215 SDValue One = DAG.getConstant(1, DL, VecTy);
3216 return DAG.getNode(
3217 ISD::XOR, DL, VecTy, N->getOperand(1),
3218 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
3219 }
3220 case Intrinsic::loongarch_lsx_vbitrevi_b:
3221 case Intrinsic::loongarch_lasx_xvbitrevi_b:
3222 return lowerVectorBitRevImm<3>(N, DAG);
3223 case Intrinsic::loongarch_lsx_vbitrevi_h:
3224 case Intrinsic::loongarch_lasx_xvbitrevi_h:
3225 return lowerVectorBitRevImm<4>(N, DAG);
3226 case Intrinsic::loongarch_lsx_vbitrevi_w:
3227 case Intrinsic::loongarch_lasx_xvbitrevi_w:
3228 return lowerVectorBitRevImm<5>(N, DAG);
3229 case Intrinsic::loongarch_lsx_vbitrevi_d:
3230 case Intrinsic::loongarch_lasx_xvbitrevi_d:
3231 return lowerVectorBitRevImm<6>(N, DAG);
3232 case Intrinsic::loongarch_lsx_vfadd_s:
3233 case Intrinsic::loongarch_lsx_vfadd_d:
3234 case Intrinsic::loongarch_lasx_xvfadd_s:
3235 case Intrinsic::loongarch_lasx_xvfadd_d:
3236 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
3237 N->getOperand(2));
3238 case Intrinsic::loongarch_lsx_vfsub_s:
3239 case Intrinsic::loongarch_lsx_vfsub_d:
3240 case Intrinsic::loongarch_lasx_xvfsub_s:
3241 case Intrinsic::loongarch_lasx_xvfsub_d:
3242 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
3243 N->getOperand(2));
3244 case Intrinsic::loongarch_lsx_vfmul_s:
3245 case Intrinsic::loongarch_lsx_vfmul_d:
3246 case Intrinsic::loongarch_lasx_xvfmul_s:
3247 case Intrinsic::loongarch_lasx_xvfmul_d:
3248 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
3249 N->getOperand(2));
3250 case Intrinsic::loongarch_lsx_vfdiv_s:
3251 case Intrinsic::loongarch_lsx_vfdiv_d:
3252 case Intrinsic::loongarch_lasx_xvfdiv_s:
3253 case Intrinsic::loongarch_lasx_xvfdiv_d:
3254 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
3255 N->getOperand(2));
3256 case Intrinsic::loongarch_lsx_vfmadd_s:
3257 case Intrinsic::loongarch_lsx_vfmadd_d:
3258 case Intrinsic::loongarch_lasx_xvfmadd_s:
3259 case Intrinsic::loongarch_lasx_xvfmadd_d:
3260 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
3261 N->getOperand(2), N->getOperand(3));
3262 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
3263 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
3264 N->getOperand(1), N->getOperand(2),
3265 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
3266 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
3267 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
3268 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
3269 N->getOperand(1), N->getOperand(2),
3270 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
3271 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
3272 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
3273 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
3274 N->getOperand(1), N->getOperand(2),
3275 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
3276 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
3277 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
3278 N->getOperand(1), N->getOperand(2),
3279 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
3280 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
3281 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
3282 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
3283 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
3284 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
3285 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
3286 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
3287 case Intrinsic::loongarch_lasx_xvreplgr2vr_d: {
3288 EVT ResTy = N->getValueType(0);
3289 SmallVector<SDValue> Ops(ResTy.getVectorNumElements(), N->getOperand(1));
3290 return DAG.getBuildVector(ResTy, DL, Ops);
3291 }
3292 case Intrinsic::loongarch_lsx_vreplve_b:
3293 case Intrinsic::loongarch_lsx_vreplve_h:
3294 case Intrinsic::loongarch_lsx_vreplve_w:
3295 case Intrinsic::loongarch_lsx_vreplve_d:
3296 case Intrinsic::loongarch_lasx_xvreplve_b:
3297 case Intrinsic::loongarch_lasx_xvreplve_h:
3298 case Intrinsic::loongarch_lasx_xvreplve_w:
3299 case Intrinsic::loongarch_lasx_xvreplve_d:
3300 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
3301 N->getOperand(1),
3302 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
3303 N->getOperand(2)));
3304 }
3305 return SDValue();
3306}
3307
3309 DAGCombinerInfo &DCI) const {
3310 SelectionDAG &DAG = DCI.DAG;
3311 switch (N->getOpcode()) {
3312 default:
3313 break;
3314 case ISD::AND:
3315 return performANDCombine(N, DAG, DCI, Subtarget);
3316 case ISD::OR:
3317 return performORCombine(N, DAG, DCI, Subtarget);
3318 case ISD::SETCC:
3319 return performSETCCCombine(N, DAG, DCI, Subtarget);
3320 case ISD::SRL:
3321 return performSRLCombine(N, DAG, DCI, Subtarget);
3323 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
3325 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
3326 }
3327 return SDValue();
3328}
3329
3332 if (!ZeroDivCheck)
3333 return MBB;
3334
3335 // Build instructions:
3336 // MBB:
3337 // div(or mod) $dst, $dividend, $divisor
3338 // bnez $divisor, SinkMBB
3339 // BreakMBB:
3340 // break 7 // BRK_DIVZERO
3341 // SinkMBB:
3342 // fallthrough
3343 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
3345 MachineFunction *MF = MBB->getParent();
3346 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
3347 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
3348 MF->insert(It, BreakMBB);
3349 MF->insert(It, SinkMBB);
3350
3351 // Transfer the remainder of MBB and its successor edges to SinkMBB.
3352 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
3353 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
3354
3355 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
3356 DebugLoc DL = MI.getDebugLoc();
3357 MachineOperand &Divisor = MI.getOperand(2);
3358 Register DivisorReg = Divisor.getReg();
3359
3360 // MBB:
3361 BuildMI(MBB, DL, TII.get(LoongArch::BNEZ))
3362 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
3363 .addMBB(SinkMBB);
3364 MBB->addSuccessor(BreakMBB);
3365 MBB->addSuccessor(SinkMBB);
3366
3367 // BreakMBB:
3368 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
3369 // definition of BRK_DIVZERO.
3370 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
3371 BreakMBB->addSuccessor(SinkMBB);
3372
3373 // Clear Divisor's kill flag.
3374 Divisor.setIsKill(false);
3375
3376 return SinkMBB;
3377}
3378
3379static MachineBasicBlock *
3381 const LoongArchSubtarget &Subtarget) {
3382 unsigned CondOpc;
3383 switch (MI.getOpcode()) {
3384 default:
3385 llvm_unreachable("Unexpected opcode");
3386 case LoongArch::PseudoVBZ:
3387 CondOpc = LoongArch::VSETEQZ_V;
3388 break;
3389 case LoongArch::PseudoVBZ_B:
3390 CondOpc = LoongArch::VSETANYEQZ_B;
3391 break;
3392 case LoongArch::PseudoVBZ_H:
3393 CondOpc = LoongArch::VSETANYEQZ_H;
3394 break;
3395 case LoongArch::PseudoVBZ_W:
3396 CondOpc = LoongArch::VSETANYEQZ_W;
3397 break;
3398 case LoongArch::PseudoVBZ_D:
3399 CondOpc = LoongArch::VSETANYEQZ_D;
3400 break;
3401 case LoongArch::PseudoVBNZ:
3402 CondOpc = LoongArch::VSETNEZ_V;
3403 break;
3404 case LoongArch::PseudoVBNZ_B:
3405 CondOpc = LoongArch::VSETALLNEZ_B;
3406 break;
3407 case LoongArch::PseudoVBNZ_H:
3408 CondOpc = LoongArch::VSETALLNEZ_H;
3409 break;
3410 case LoongArch::PseudoVBNZ_W:
3411 CondOpc = LoongArch::VSETALLNEZ_W;
3412 break;
3413 case LoongArch::PseudoVBNZ_D:
3414 CondOpc = LoongArch::VSETALLNEZ_D;
3415 break;
3416 case LoongArch::PseudoXVBZ:
3417 CondOpc = LoongArch::XVSETEQZ_V;
3418 break;
3419 case LoongArch::PseudoXVBZ_B:
3420 CondOpc = LoongArch::XVSETANYEQZ_B;
3421 break;
3422 case LoongArch::PseudoXVBZ_H:
3423 CondOpc = LoongArch::XVSETANYEQZ_H;
3424 break;
3425 case LoongArch::PseudoXVBZ_W:
3426 CondOpc = LoongArch::XVSETANYEQZ_W;
3427 break;
3428 case LoongArch::PseudoXVBZ_D:
3429 CondOpc = LoongArch::XVSETANYEQZ_D;
3430 break;
3431 case LoongArch::PseudoXVBNZ:
3432 CondOpc = LoongArch::XVSETNEZ_V;
3433 break;
3434 case LoongArch::PseudoXVBNZ_B:
3435 CondOpc = LoongArch::XVSETALLNEZ_B;
3436 break;
3437 case LoongArch::PseudoXVBNZ_H:
3438 CondOpc = LoongArch::XVSETALLNEZ_H;
3439 break;
3440 case LoongArch::PseudoXVBNZ_W:
3441 CondOpc = LoongArch::XVSETALLNEZ_W;
3442 break;
3443 case LoongArch::PseudoXVBNZ_D:
3444 CondOpc = LoongArch::XVSETALLNEZ_D;
3445 break;
3446 }
3447
3448 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3449 const BasicBlock *LLVM_BB = BB->getBasicBlock();
3450 DebugLoc DL = MI.getDebugLoc();
3453
3454 MachineFunction *F = BB->getParent();
3455 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
3456 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
3457 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
3458
3459 F->insert(It, FalseBB);
3460 F->insert(It, TrueBB);
3461 F->insert(It, SinkBB);
3462
3463 // Transfer the remainder of MBB and its successor edges to Sink.
3464 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
3466
3467 // Insert the real instruction to BB.
3468 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
3469 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
3470
3471 // Insert branch.
3472 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
3473 BB->addSuccessor(FalseBB);
3474 BB->addSuccessor(TrueBB);
3475
3476 // FalseBB.
3477 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
3478 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
3479 .addReg(LoongArch::R0)
3480 .addImm(0);
3481 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
3482 FalseBB->addSuccessor(SinkBB);
3483
3484 // TrueBB.
3485 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
3486 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
3487 .addReg(LoongArch::R0)
3488 .addImm(1);
3489 TrueBB->addSuccessor(SinkBB);
3490
3491 // SinkBB: merge the results.
3492 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
3493 MI.getOperand(0).getReg())
3494 .addReg(RD1)
3495 .addMBB(FalseBB)
3496 .addReg(RD2)
3497 .addMBB(TrueBB);
3498
3499 // The pseudo instruction is gone now.
3500 MI.eraseFromParent();
3501 return SinkBB;
3502}
3503
3504static MachineBasicBlock *
3506 const LoongArchSubtarget &Subtarget) {
3507 unsigned InsOp;
3508 unsigned HalfSize;
3509 switch (MI.getOpcode()) {
3510 default:
3511 llvm_unreachable("Unexpected opcode");
3512 case LoongArch::PseudoXVINSGR2VR_B:
3513 HalfSize = 16;
3514 InsOp = LoongArch::VINSGR2VR_B;
3515 break;
3516 case LoongArch::PseudoXVINSGR2VR_H:
3517 HalfSize = 8;
3518 InsOp = LoongArch::VINSGR2VR_H;
3519 break;
3520 }
3521 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3522 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
3523 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
3524 DebugLoc DL = MI.getDebugLoc();
3526 // XDst = vector_insert XSrc, Elt, Idx
3527 Register XDst = MI.getOperand(0).getReg();
3528 Register XSrc = MI.getOperand(1).getReg();
3529 Register Elt = MI.getOperand(2).getReg();
3530 unsigned Idx = MI.getOperand(3).getImm();
3531
3532 Register ScratchReg1 = XSrc;
3533 if (Idx >= HalfSize) {
3534 ScratchReg1 = MRI.createVirtualRegister(RC);
3535 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1)
3536 .addReg(XSrc)
3537 .addReg(XSrc)
3538 .addImm(1);
3539 }
3540
3541 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
3542 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
3543 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
3544 .addReg(ScratchReg1, 0, LoongArch::sub_128);
3545 BuildMI(*BB, MI, DL, TII->get(InsOp), ScratchSubReg2)
3546 .addReg(ScratchSubReg1)
3547 .addReg(Elt)
3548 .addImm(Idx >= HalfSize ? Idx - HalfSize : Idx);
3549
3550 Register ScratchReg2 = XDst;
3551 if (Idx >= HalfSize)
3552 ScratchReg2 = MRI.createVirtualRegister(RC);
3553
3554 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), ScratchReg2)
3555 .addImm(0)
3556 .addReg(ScratchSubReg2)
3557 .addImm(LoongArch::sub_128);
3558
3559 if (Idx >= HalfSize)
3560 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), XDst)
3561 .addReg(XSrc)
3562 .addReg(ScratchReg2)
3563 .addImm(2);
3564
3565 MI.eraseFromParent();
3566 return BB;
3567}
3568
3569MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
3570 MachineInstr &MI, MachineBasicBlock *BB) const {
3571 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3572 DebugLoc DL = MI.getDebugLoc();
3573
3574 switch (MI.getOpcode()) {
3575 default:
3576 llvm_unreachable("Unexpected instr type to insert");
3577 case LoongArch::DIV_W:
3578 case LoongArch::DIV_WU:
3579 case LoongArch::MOD_W:
3580 case LoongArch::MOD_WU:
3581 case LoongArch::DIV_D:
3582 case LoongArch::DIV_DU:
3583 case LoongArch::MOD_D:
3584 case LoongArch::MOD_DU:
3585 return insertDivByZeroTrap(MI, BB);
3586 break;
3587 case LoongArch::WRFCSR: {
3588 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
3589 LoongArch::FCSR0 + MI.getOperand(0).getImm())
3590 .addReg(MI.getOperand(1).getReg());
3591 MI.eraseFromParent();
3592 return BB;
3593 }
3594 case LoongArch::RDFCSR: {
3595 MachineInstr *ReadFCSR =
3596 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
3597 MI.getOperand(0).getReg())
3598 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
3599 ReadFCSR->getOperand(1).setIsUndef();
3600 MI.eraseFromParent();
3601 return BB;
3602 }
3603 case LoongArch::PseudoVBZ:
3604 case LoongArch::PseudoVBZ_B:
3605 case LoongArch::PseudoVBZ_H:
3606 case LoongArch::PseudoVBZ_W:
3607 case LoongArch::PseudoVBZ_D:
3608 case LoongArch::PseudoVBNZ:
3609 case LoongArch::PseudoVBNZ_B:
3610 case LoongArch::PseudoVBNZ_H:
3611 case LoongArch::PseudoVBNZ_W:
3612 case LoongArch::PseudoVBNZ_D:
3613 case LoongArch::PseudoXVBZ:
3614 case LoongArch::PseudoXVBZ_B:
3615 case LoongArch::PseudoXVBZ_H:
3616 case LoongArch::PseudoXVBZ_W:
3617 case LoongArch::PseudoXVBZ_D:
3618 case LoongArch::PseudoXVBNZ:
3619 case LoongArch::PseudoXVBNZ_B:
3620 case LoongArch::PseudoXVBNZ_H:
3621 case LoongArch::PseudoXVBNZ_W:
3622 case LoongArch::PseudoXVBNZ_D:
3623 return emitVecCondBranchPseudo(MI, BB, Subtarget);
3624 case LoongArch::PseudoXVINSGR2VR_B:
3625 case LoongArch::PseudoXVINSGR2VR_H:
3626 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
3627 }
3628}
3629
3631 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
3632 unsigned *Fast) const {
3633 if (!Subtarget.hasUAL())
3634 return false;
3635
3636 // TODO: set reasonable speed number.
3637 if (Fast)
3638 *Fast = 1;
3639 return true;
3640}
3641
3642const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
3643 switch ((LoongArchISD::NodeType)Opcode) {
3645 break;
3646
3647#define NODE_NAME_CASE(node) \
3648 case LoongArchISD::node: \
3649 return "LoongArchISD::" #node;
3650
3651 // TODO: Add more target-dependent nodes later.
3652 NODE_NAME_CASE(CALL)
3653 NODE_NAME_CASE(CALL_MEDIUM)
3654 NODE_NAME_CASE(CALL_LARGE)
3655 NODE_NAME_CASE(RET)
3656 NODE_NAME_CASE(TAIL)
3657 NODE_NAME_CASE(TAIL_MEDIUM)
3658 NODE_NAME_CASE(TAIL_LARGE)
3659 NODE_NAME_CASE(SLL_W)
3660 NODE_NAME_CASE(SRA_W)
3661 NODE_NAME_CASE(SRL_W)
3662 NODE_NAME_CASE(BSTRINS)
3663 NODE_NAME_CASE(BSTRPICK)
3664 NODE_NAME_CASE(MOVGR2FR_W_LA64)
3665 NODE_NAME_CASE(MOVFR2GR_S_LA64)
3666 NODE_NAME_CASE(FTINT)
3667 NODE_NAME_CASE(REVB_2H)
3668 NODE_NAME_CASE(REVB_2W)
3669 NODE_NAME_CASE(BITREV_4B)
3670 NODE_NAME_CASE(BITREV_W)
3671 NODE_NAME_CASE(ROTR_W)
3672 NODE_NAME_CASE(ROTL_W)
3673 NODE_NAME_CASE(DIV_WU)
3674 NODE_NAME_CASE(MOD_WU)
3675 NODE_NAME_CASE(CLZ_W)
3676 NODE_NAME_CASE(CTZ_W)
3677 NODE_NAME_CASE(DBAR)
3678 NODE_NAME_CASE(IBAR)
3679 NODE_NAME_CASE(BREAK)
3680 NODE_NAME_CASE(SYSCALL)
3681 NODE_NAME_CASE(CRC_W_B_W)
3682 NODE_NAME_CASE(CRC_W_H_W)
3683 NODE_NAME_CASE(CRC_W_W_W)
3684 NODE_NAME_CASE(CRC_W_D_W)
3685 NODE_NAME_CASE(CRCC_W_B_W)
3686 NODE_NAME_CASE(CRCC_W_H_W)
3687 NODE_NAME_CASE(CRCC_W_W_W)
3688 NODE_NAME_CASE(CRCC_W_D_W)
3689 NODE_NAME_CASE(CSRRD)
3690 NODE_NAME_CASE(CSRWR)
3691 NODE_NAME_CASE(CSRXCHG)
3692 NODE_NAME_CASE(IOCSRRD_B)
3693 NODE_NAME_CASE(IOCSRRD_H)
3694 NODE_NAME_CASE(IOCSRRD_W)
3695 NODE_NAME_CASE(IOCSRRD_D)
3696 NODE_NAME_CASE(IOCSRWR_B)
3697 NODE_NAME_CASE(IOCSRWR_H)
3698 NODE_NAME_CASE(IOCSRWR_W)
3699 NODE_NAME_CASE(IOCSRWR_D)
3700 NODE_NAME_CASE(CPUCFG)
3701 NODE_NAME_CASE(MOVGR2FCSR)
3702 NODE_NAME_CASE(MOVFCSR2GR)
3703 NODE_NAME_CASE(CACOP_D)
3704 NODE_NAME_CASE(CACOP_W)
3705 NODE_NAME_CASE(VPICK_SEXT_ELT)
3706 NODE_NAME_CASE(VPICK_ZEXT_ELT)
3707 NODE_NAME_CASE(VREPLVE)
3708 NODE_NAME_CASE(VALL_ZERO)
3709 NODE_NAME_CASE(VANY_ZERO)
3710 NODE_NAME_CASE(VALL_NONZERO)
3711 NODE_NAME_CASE(VANY_NONZERO)
3712 }
3713#undef NODE_NAME_CASE
3714 return nullptr;
3715}
3716
3717//===----------------------------------------------------------------------===//
3718// Calling Convention Implementation
3719//===----------------------------------------------------------------------===//
3720
3721// Eight general-purpose registers a0-a7 used for passing integer arguments,
3722// with a0-a1 reused to return values. Generally, the GPRs are used to pass
3723// fixed-point arguments, and floating-point arguments when no FPR is available
3724// or with soft float ABI.
3725const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
3726 LoongArch::R7, LoongArch::R8, LoongArch::R9,
3727 LoongArch::R10, LoongArch::R11};
3728// Eight floating-point registers fa0-fa7 used for passing floating-point
3729// arguments, and fa0-fa1 are also used to return values.
3730const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
3731 LoongArch::F3, LoongArch::F4, LoongArch::F5,
3732 LoongArch::F6, LoongArch::F7};
3733// FPR32 and FPR64 alias each other.
3735 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
3736 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
3737
3738const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
3739 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
3740 LoongArch::VR6, LoongArch::VR7};
3741
3742const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
3743 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
3744 LoongArch::XR6, LoongArch::XR7};
3745
3746// Pass a 2*GRLen argument that has been split into two GRLen values through
3747// registers or the stack as necessary.
3748static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
3749 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
3750 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
3751 ISD::ArgFlagsTy ArgFlags2) {
3752 unsigned GRLenInBytes = GRLen / 8;
3753 if (Register Reg = State.AllocateReg(ArgGPRs)) {
3754 // At least one half can be passed via register.
3755 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
3756 VA1.getLocVT(), CCValAssign::Full));
3757 } else {
3758 // Both halves must be passed on the stack, with proper alignment.
3759 Align StackAlign =
3760 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
3761 State.addLoc(
3763 State.AllocateStack(GRLenInBytes, StackAlign),
3764 VA1.getLocVT(), CCValAssign::Full));
3766 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
3767 LocVT2, CCValAssign::Full));
3768 return false;
3769 }
3770 if (Register Reg = State.AllocateReg(ArgGPRs)) {
3771 // The second half can also be passed via register.
3772 State.addLoc(
3773 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
3774 } else {
3775 // The second half is passed via the stack, without additional alignment.
3777 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
3778 LocVT2, CCValAssign::Full));
3779 }
3780 return false;
3781}
3782
3783// Implements the LoongArch calling convention. Returns true upon failure.
3785 unsigned ValNo, MVT ValVT,
3786 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
3787 CCState &State, bool IsFixed, bool IsRet,
3788 Type *OrigTy) {
3789 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
3790 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
3791 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
3792 MVT LocVT = ValVT;
3793
3794 // Any return value split into more than two values can't be returned
3795 // directly.
3796 if (IsRet && ValNo > 1)
3797 return true;
3798
3799 // If passing a variadic argument, or if no FPR is available.
3800 bool UseGPRForFloat = true;
3801
3802 switch (ABI) {
3803 default:
3804 llvm_unreachable("Unexpected ABI");
3805 break;
3810 UseGPRForFloat = !IsFixed;
3811 break;
3814 break;
3815 }
3816
3817 // FPR32 and FPR64 alias each other.
3818 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
3819 UseGPRForFloat = true;
3820
3821 if (UseGPRForFloat && ValVT == MVT::f32) {
3822 LocVT = GRLenVT;
3823 LocInfo = CCValAssign::BCvt;
3824 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
3825 LocVT = MVT::i64;
3826 LocInfo = CCValAssign::BCvt;
3827 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
3828 // TODO: Handle passing f64 on LA32 with D feature.
3829 report_fatal_error("Passing f64 with GPR on LA32 is undefined");
3830 }
3831
3832 // If this is a variadic argument, the LoongArch calling convention requires
3833 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
3834 // byte alignment. An aligned register should be used regardless of whether
3835 // the original argument was split during legalisation or not. The argument
3836 // will not be passed by registers if the original type is larger than
3837 // 2*GRLen, so the register alignment rule does not apply.
3838 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
3839 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
3840 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
3841 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
3842 // Skip 'odd' register if necessary.
3843 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
3844 State.AllocateReg(ArgGPRs);
3845 }
3846
3847 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
3848 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
3849 State.getPendingArgFlags();
3850
3851 assert(PendingLocs.size() == PendingArgFlags.size() &&
3852 "PendingLocs and PendingArgFlags out of sync");
3853
3854 // Split arguments might be passed indirectly, so keep track of the pending
3855 // values.
3856 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
3857 LocVT = GRLenVT;
3858 LocInfo = CCValAssign::Indirect;
3859 PendingLocs.push_back(
3860 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
3861 PendingArgFlags.push_back(ArgFlags);
3862 if (!ArgFlags.isSplitEnd()) {
3863 return false;
3864 }
3865 }
3866
3867 // If the split argument only had two elements, it should be passed directly
3868 // in registers or on the stack.
3869 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
3870 PendingLocs.size() <= 2) {
3871 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
3872 // Apply the normal calling convention rules to the first half of the
3873 // split argument.
3874 CCValAssign VA = PendingLocs[0];
3875 ISD::ArgFlagsTy AF = PendingArgFlags[0];
3876 PendingLocs.clear();
3877 PendingArgFlags.clear();
3878 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
3879 ArgFlags);
3880 }
3881
3882 // Allocate to a register if possible, or else a stack slot.
3883 Register Reg;
3884 unsigned StoreSizeBytes = GRLen / 8;
3885 Align StackAlign = Align(GRLen / 8);
3886
3887 if (ValVT == MVT::f32 && !UseGPRForFloat)
3888 Reg = State.AllocateReg(ArgFPR32s);
3889 else if (ValVT == MVT::f64 && !UseGPRForFloat)
3890 Reg = State.AllocateReg(ArgFPR64s);
3891 else if (ValVT.is128BitVector())
3892 Reg = State.AllocateReg(ArgVRs);
3893 else if (ValVT.is256BitVector())
3894 Reg = State.AllocateReg(ArgXRs);
3895 else
3896 Reg = State.AllocateReg(ArgGPRs);
3897
3898 unsigned StackOffset =
3899 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
3900
3901 // If we reach this point and PendingLocs is non-empty, we must be at the
3902 // end of a split argument that must be passed indirectly.
3903 if (!PendingLocs.empty()) {
3904 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
3905 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
3906 for (auto &It : PendingLocs) {
3907 if (Reg)
3908 It.convertToReg(Reg);
3909 else
3910 It.convertToMem(StackOffset);
3911 State.addLoc(It);
3912 }
3913 PendingLocs.clear();
3914 PendingArgFlags.clear();
3915 return false;
3916 }
3917 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
3918 "Expected an GRLenVT at this stage");
3919
3920 if (Reg) {
3921 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3922 return false;
3923 }
3924
3925 // When a floating-point value is passed on the stack, no bit-cast is needed.
3926 if (ValVT.isFloatingPoint()) {
3927 LocVT = ValVT;
3928 LocInfo = CCValAssign::Full;
3929 }
3930
3931 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
3932 return false;
3933}
3934
3935void LoongArchTargetLowering::analyzeInputArgs(
3936 MachineFunction &MF, CCState &CCInfo,
3937 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
3938 LoongArchCCAssignFn Fn) const {
3940 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
3941 MVT ArgVT = Ins[i].VT;
3942 Type *ArgTy = nullptr;
3943 if (IsRet)
3944 ArgTy = FType->getReturnType();
3945 else if (Ins[i].isOrigArg())
3946 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
3949 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
3950 CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) {
3951 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
3952 << '\n');
3953 llvm_unreachable("");
3954 }
3955 }
3956}
3957
3958void LoongArchTargetLowering::analyzeOutputArgs(
3959 MachineFunction &MF, CCState &CCInfo,
3960 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
3961 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
3962 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
3963 MVT ArgVT = Outs[i].VT;
3964 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
3967 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
3968 CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
3969 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
3970 << "\n");
3971 llvm_unreachable("");
3972 }
3973 }
3974}
3975
3976// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
3977// values.
3979 const CCValAssign &VA, const SDLoc &DL) {
3980 switch (VA.getLocInfo()) {
3981 default:
3982 llvm_unreachable("Unexpected CCValAssign::LocInfo");
3983 case CCValAssign::Full:
3985 break;
3986 case CCValAssign::BCvt:
3987 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
3988 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
3989 else
3990 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
3991 break;
3992 }
3993 return Val;
3994}
3995
3997 const CCValAssign &VA, const SDLoc &DL,
3998 const ISD::InputArg &In,
3999 const LoongArchTargetLowering &TLI) {
4002 EVT LocVT = VA.getLocVT();
4003 SDValue Val;
4004 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
4005 Register VReg = RegInfo.createVirtualRegister(RC);
4006 RegInfo.addLiveIn(VA.getLocReg(), VReg);
4007 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
4008
4009 // If input is sign extended from 32 bits, note it for the OptW pass.
4010 if (In.isOrigArg()) {
4011 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
4012 if (OrigArg->getType()->isIntegerTy()) {
4013 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
4014 // An input zero extended from i31 can also be considered sign extended.
4015 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
4016 (BitWidth < 32 && In.Flags.isZExt())) {
4019 LAFI->addSExt32Register(VReg);
4020 }
4021 }
4022 }
4023
4024 return convertLocVTToValVT(DAG, Val, VA, DL);
4025}
4026
4027// The caller is responsible for loading the full value if the argument is
4028// passed with CCValAssign::Indirect.
4030 const CCValAssign &VA, const SDLoc &DL) {
4032 MachineFrameInfo &MFI = MF.getFrameInfo();
4033 EVT ValVT = VA.getValVT();
4034 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
4035 /*IsImmutable=*/true);
4036 SDValue FIN = DAG.getFrameIndex(
4038
4039 ISD::LoadExtType ExtType;
4040 switch (VA.getLocInfo()) {
4041 default:
4042 llvm_unreachable("Unexpected CCValAssign::LocInfo");
4043 case CCValAssign::Full:
4045 case CCValAssign::BCvt:
4046 ExtType = ISD::NON_EXTLOAD;
4047 break;
4048 }
4049 return DAG.getExtLoad(
4050 ExtType, DL, VA.getLocVT(), Chain, FIN,
4052}
4053
4055 const CCValAssign &VA, const SDLoc &DL) {
4056 EVT LocVT = VA.getLocVT();
4057
4058 switch (VA.getLocInfo()) {
4059 default:
4060 llvm_unreachable("Unexpected CCValAssign::LocInfo");
4061 case CCValAssign::Full:
4062 break;
4063 case CCValAssign::BCvt:
4064 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
4065 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
4066 else
4067 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
4068 break;
4069 }
4070 return Val;
4071}
4072
4073static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
4074 CCValAssign::LocInfo LocInfo,
4075 ISD::ArgFlagsTy ArgFlags, CCState &State) {
4076 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
4077 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
4078 // s0 s1 s2 s3 s4 s5 s6 s7 s8
4079 static const MCPhysReg GPRList[] = {
4080 LoongArch::R23, LoongArch::R24, LoongArch::R25,
4081 LoongArch::R26, LoongArch::R27, LoongArch::R28,
4082 LoongArch::R29, LoongArch::R30, LoongArch::R31};
4083 if (unsigned Reg = State.AllocateReg(GPRList)) {
4084 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
4085 return false;
4086 }
4087 }
4088
4089 if (LocVT == MVT::f32) {
4090 // Pass in STG registers: F1, F2, F3, F4
4091 // fs0,fs1,fs2,fs3
4092 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
4093 LoongArch::F26, LoongArch::F27};
4094 if (unsigned Reg = State.AllocateReg(FPR32List)) {
4095 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
4096 return false;
4097 }
4098 }
4099
4100 if (LocVT == MVT::f64) {
4101 // Pass in STG registers: D1, D2, D3, D4
4102 // fs4,fs5,fs6,fs7
4103 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
4104 LoongArch::F30_64, LoongArch::F31_64};
4105 if (unsigned Reg = State.AllocateReg(FPR64List)) {
4106 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
4107 return false;
4108 }
4109 }
4110
4111 report_fatal_error("No registers left in GHC calling convention");
4112 return true;
4113}
4114
4115// Transform physical registers into virtual registers.
4117 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
4118 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
4119 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4120
4122
4123 switch (CallConv) {
4124 default:
4125 llvm_unreachable("Unsupported calling convention");
4126 case CallingConv::C:
4127 case CallingConv::Fast:
4128 break;
4129 case CallingConv::GHC:
4130 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
4131 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
4133 "GHC calling convention requires the F and D extensions");
4134 }
4135
4136 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4137 MVT GRLenVT = Subtarget.getGRLenVT();
4138 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
4139 // Used with varargs to acumulate store chains.
4140 std::vector<SDValue> OutChains;
4141
4142 // Assign locations to all of the incoming arguments.
4144 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
4145
4146 if (CallConv == CallingConv::GHC)
4148 else
4149 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
4150
4151 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4152 CCValAssign &VA = ArgLocs[i];
4153 SDValue ArgValue;
4154 if (VA.isRegLoc())
4155 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[i], *this);
4156 else
4157 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
4158 if (VA.getLocInfo() == CCValAssign::Indirect) {
4159 // If the original argument was split and passed by reference, we need to
4160 // load all parts of it here (using the same address).
4161 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
4163 unsigned ArgIndex = Ins[i].OrigArgIndex;
4164 unsigned ArgPartOffset = Ins[i].PartOffset;
4165 assert(ArgPartOffset == 0);
4166 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
4167 CCValAssign &PartVA = ArgLocs[i + 1];
4168 unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
4169 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
4170 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
4171 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
4173 ++i;
4174 }
4175 continue;
4176 }
4177 InVals.push_back(ArgValue);
4178 }
4179
4180 if (IsVarArg) {
4182 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
4183 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
4184 MachineFrameInfo &MFI = MF.getFrameInfo();
4185 MachineRegisterInfo &RegInfo = MF.getRegInfo();
4186 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
4187
4188 // Offset of the first variable argument from stack pointer, and size of
4189 // the vararg save area. For now, the varargs save area is either zero or
4190 // large enough to hold a0-a7.
4191 int VaArgOffset, VarArgsSaveSize;
4192
4193 // If all registers are allocated, then all varargs must be passed on the
4194 // stack and we don't need to save any argregs.
4195 if (ArgRegs.size() == Idx) {
4196 VaArgOffset = CCInfo.getStackSize();
4197 VarArgsSaveSize = 0;
4198 } else {
4199 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
4200 VaArgOffset = -VarArgsSaveSize;
4201 }
4202
4203 // Record the frame index of the first variable argument
4204 // which is a value necessary to VASTART.
4205 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
4206 LoongArchFI->setVarArgsFrameIndex(FI);
4207
4208 // If saving an odd number of registers then create an extra stack slot to
4209 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
4210 // offsets to even-numbered registered remain 2*GRLen-aligned.
4211 if (